From 8e8b4def4f5fcfeb44abf9c6dcaac20111345a85 Mon Sep 17 00:00:00 2001
From: Victorien Elvinger <victorien@elvinger.fr>
Date: Sun, 8 Feb 2026 23:25:18 +0100
Subject: [PATCH 01/78] impl Copy for FromBytesUntilNulError

---
 library/core/src/ffi/c_str.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/library/core/src/ffi/c_str.rs b/library/core/src/ffi/c_str.rs
index 621277179bb38..cd744e3e141d4 100644
--- a/library/core/src/ffi/c_str.rs
+++ b/library/core/src/ffi/c_str.rs
@@ -155,7 +155,7 @@ impl Error for FromBytesWithNulError {}
 /// within the slice.
 ///
 /// This error is created by the [`CStr::from_bytes_until_nul`] method.
-#[derive(Clone, PartialEq, Eq, Debug)]
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
 #[stable(feature = "cstr_from_bytes_until_nul", since = "1.69.0")]
 pub struct FromBytesUntilNulError(());
 

From cff9a882a9112298382b7e2e06de54018b4a181e Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Thu, 30 Apr 2026 03:49:51 +0530
Subject: [PATCH 02/78] some changes in argument loading in intrinsic-test

revert the offset loading, instead load in a wrapping fashion
---
 .../intrinsic-test/src/common/argument.rs     | 26 +++++++++++--------
 .../crates/intrinsic-test/src/common/gen_c.rs |  7 -----
 .../intrinsic-test/src/common/gen_rust.rs     |  6 -----
 3 files changed, 15 insertions(+), 24 deletions(-)

diff --git a/library/stdarch/crates/intrinsic-test/src/common/argument.rs b/library/stdarch/crates/intrinsic-test/src/common/argument.rs
index 413d5314d3180..8ae9869db0621 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/argument.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/argument.rs
@@ -1,5 +1,6 @@
 use super::cli::Language;
 use super::constraint::Constraint;
+use super::gen_rust::PASSES;
 use super::indentation::Indentation;
 use super::intrinsic_helpers::IntrinsicTypeDefinition;
 
@@ -178,7 +179,7 @@ where
             .enumerate()
             .map(|(idx, arg)| {
                 format!(
-                    "{indentation}{ty} {name} = cast<{ty}>({load}(&{name}_vals[i+{idx}]));\n",
+                    "{indentation}{ty} {name} = cast<{ty}>({load}(&{name}_vals[(i+{idx}) % {PASSES}]));\n",
                     ty = arg.to_c_type(),
                     name = arg.generate_name(),
                     load = if arg.is_simd() {
@@ -199,17 +200,20 @@ where
             .filter(|&arg| !arg.has_constraint())
             .enumerate()
             .map(|(idx, arg)| {
-                let load = if arg.is_simd() {
-                    arg.ty.get_load_function(Language::Rust)
+                if arg.is_simd() {
+                    format!(
+                        "{indentation}let {name} = {load}({vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _);\n",
+                        name = arg.generate_name(),
+                        vals_name = arg.rust_vals_array_name(),
+                        load = arg.ty.get_load_function(Language::Rust),
+                    )
                 } else {
-                    "*".to_string()
-                };
-                let typecast = if load.len() > 2 { "as _" } else { "" };
-                format!(
-                    "{indentation}let {name} = {load}({vals_name}.as_ptr().offset(i+{idx}){typecast});\n",
-                    name = arg.generate_name(),
-                    vals_name = arg.rust_vals_array_name(),
-                )
+                    format!(
+                        "{indentation}let {name} = {vals_name}[(i+{idx}) % {PASSES}];\n",
+                        name = arg.generate_name(),
+                        vals_name = arg.rust_vals_array_name(),
+                    )
+                }
             })
             .collect()
     }
diff --git a/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs b/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs
index 12b5a600d5aef..a95b4c36b7bf0 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs
@@ -25,13 +25,6 @@ pub fn generate_c_test_loop<T: IntrinsicTypeDefinition + Sized>(
     passes: u32,
 ) -> std::io::Result<()> {
     let body_indentation = indentation.nested();
-    // Successive arguments are offset increasingly from their value array start
-    let passes = passes + 1
-        - intrinsic
-            .arguments
-            .iter()
-            .filter(|&arg| !arg.has_constraint())
-            .count() as u32;
     writeln!(
         w,
         "{indentation}for (int i=0; i<{passes}; i++) {{\n\
diff --git a/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs b/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs
index 5a0bc7b4d4b97..82b97701bb14a 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs
@@ -214,12 +214,6 @@ pub fn generate_rust_test_loop<T: IntrinsicTypeDefinition>(
     passes: u32,
 ) -> std::io::Result<()> {
     let intrinsic_name = &intrinsic.name;
-    let passes = passes + 1
-        - intrinsic
-            .arguments
-            .iter()
-            .filter(|&arg| !arg.has_constraint())
-            .count() as u32;
 
     // Each function (and each specialization) has its own type. Erase that type with a cast.
     let mut coerce = String::from("unsafe fn(");

From af754dcaf3e79d68cafe7a72c12cd973af199bc2 Mon Sep 17 00:00:00 2001
From: David Wood <david.wood2@arm.com>
Date: Mon, 20 Apr 2026 16:41:06 +0100
Subject: [PATCH 03/78] gen-arm: resolve outdated `big_endian_inverse` todo

Earlier patches added `big_endian_inverse` and used `transmute` instead
of using `vreinterpret` due to an LLVM bug that has since been resolved
---
 .../src/arm_shared/neon/generated.rs          | 488 +-----------------
 .../spec/neon/arm_shared.spec.yml             |  36 +-
 2 files changed, 36 insertions(+), 488 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index 84c1a91adf79f..ce9b236ff551e 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -9573,35 +9573,6 @@ pub fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
 #[doc = "Dot product arithmetic (indexed)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sdot, LANE = 0)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vdot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: int32x2_t = transmute(c);
-        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vdot_s32(a, b, transmute(c))
-    }
-}
-#[doc = "Dot product arithmetic (indexed)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[target_feature(enable = "neon,dotprod")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
@@ -9620,49 +9591,15 @@ pub fn vdot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x8_t) ->
 )]
 pub fn vdot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 1);
+    let c: int32x2_t = vreinterpret_s32_s8(c);
     unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: int8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: int32x2_t = transmute(c);
         let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        let ret_val: int32x2_t = vdot_s32(a, b, transmute(c));
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Dot product arithmetic (indexed)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_s32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sdot, LANE = 0)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: int32x2_t = transmute(c);
-        let c: int32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vdotq_s32(a, b, transmute(c))
+        vdot_s32(a, b, vreinterpret_s8_s32(c))
     }
 }
 #[doc = "Dot product arithmetic (indexed)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[target_feature(enable = "neon,dotprod")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
@@ -9681,50 +9618,16 @@ pub fn vdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x8_t)
 )]
 pub fn vdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x8_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 1);
+    let c: int32x2_t = vreinterpret_s32_s8(c);
     unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let b: int8x16_t =
-            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: int8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: int32x2_t = transmute(c);
         let c: int32x4_t =
             simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        let ret_val: int32x4_t = vdotq_s32(a, b, transmute(c));
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Dot product arithmetic (indexed)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_u32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(udot, LANE = 0)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vdot_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: uint32x2_t = transmute(c);
-        let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vdot_u32(a, b, transmute(c))
+        vdotq_s32(a, b, vreinterpretq_s8_s32(c))
     }
 }
 #[doc = "Dot product arithmetic (indexed)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[target_feature(enable = "neon,dotprod")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
@@ -9743,20 +9646,15 @@ pub fn vdot_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t)
 )]
 pub fn vdot_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
     static_assert_uimm_bits!(LANE, 1);
+    let c: uint32x2_t = vreinterpret_u32_u8(c);
     unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: uint32x2_t = transmute(c);
         let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        let ret_val: uint32x2_t = vdot_u32(a, b, transmute(c));
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        vdot_u32(a, b, vreinterpret_u8_u32(c))
     }
 }
 #[doc = "Dot product arithmetic (indexed)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[target_feature(enable = "neon,dotprod")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
@@ -9775,51 +9673,16 @@ pub fn vdot_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t)
 )]
 pub fn vdotq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x8_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 1);
+    let c: uint32x2_t = vreinterpret_u32_u8(c);
     unsafe {
-        let c: uint32x2_t = transmute(c);
         let c: uint32x4_t =
             simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vdotq_u32(a, b, transmute(c))
-    }
-}
-#[doc = "Dot product arithmetic (indexed)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_u32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(udot, LANE = 0)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vdotq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x8_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let b: uint8x16_t =
-            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: uint32x2_t = transmute(c);
-        let c: uint32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        let ret_val: uint32x4_t = vdotq_u32(a, b, transmute(c));
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        vdotq_u32(a, b, vreinterpretq_u8_u32(c))
     }
 }
 #[doc = "Dot product arithmetic (indexed)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[target_feature(enable = "neon,dotprod")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
@@ -9831,42 +9694,15 @@ pub fn vdotq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x8_
 #[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
 pub fn vdot_laneq_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x16_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 2);
+    let c: int32x4_t = vreinterpretq_s32_s8(c);
     unsafe {
-        let c: int32x4_t = transmute(c);
         let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vdot_s32(a, b, transmute(c))
-    }
-}
-#[doc = "Dot product arithmetic (indexed)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sdot, LANE = 0)
-)]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
-pub fn vdot_laneq_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x16_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: int8x16_t =
-            simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: int32x4_t = transmute(c);
-        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        let ret_val: int32x2_t = vdot_s32(a, b, transmute(c));
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        vdot_s32(a, b, vreinterpret_s8_s32(c))
     }
 }
 #[doc = "Dot product arithmetic (indexed)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[target_feature(enable = "neon,dotprod")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
@@ -9878,45 +9714,16 @@ pub fn vdot_laneq_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x16_t)
 #[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
 pub fn vdotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
+    let c: int32x4_t = vreinterpretq_s32_s8(c);
     unsafe {
-        let c: int32x4_t = transmute(c);
         let c: int32x4_t =
             simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vdotq_s32(a, b, transmute(c))
-    }
-}
-#[doc = "Dot product arithmetic (indexed)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sdot, LANE = 0)
-)]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
-pub fn vdotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let b: int8x16_t =
-            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: int8x16_t =
-            simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: int32x4_t = transmute(c);
-        let c: int32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        let ret_val: int32x4_t = vdotq_s32(a, b, transmute(c));
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        vdotq_s32(a, b, vreinterpretq_s8_s32(c))
     }
 }
 #[doc = "Dot product arithmetic (indexed)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[target_feature(enable = "neon,dotprod")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
@@ -9928,42 +9735,15 @@ pub fn vdotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x16_t
 #[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
 pub fn vdot_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x16_t) -> uint32x2_t {
     static_assert_uimm_bits!(LANE, 2);
+    let c: uint32x4_t = vreinterpretq_u32_u8(c);
     unsafe {
-        let c: uint32x4_t = transmute(c);
         let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
         vdot_u32(a, b, transmute(c))
     }
 }
 #[doc = "Dot product arithmetic (indexed)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_u32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(udot, LANE = 0)
-)]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
-pub fn vdot_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x16_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: uint8x16_t =
-            simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: uint32x4_t = transmute(c);
-        let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        let ret_val: uint32x2_t = vdot_u32(a, b, transmute(c));
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Dot product arithmetic (indexed)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[target_feature(enable = "neon,dotprod")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
@@ -9975,41 +9755,13 @@ pub fn vdot_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x16_
 #[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
 pub fn vdotq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 2);
+    let c: uint32x4_t = vreinterpretq_u32_u8(c);
     unsafe {
-        let c: uint32x4_t = transmute(c);
         let c: uint32x4_t =
             simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
         vdotq_u32(a, b, transmute(c))
     }
 }
-#[doc = "Dot product arithmetic (indexed)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_u32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(udot, LANE = 0)
-)]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
-pub fn vdotq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let b: uint8x16_t =
-            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: uint8x16_t =
-            simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: uint32x4_t = transmute(c);
-        let c: uint32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        let ret_val: uint32x4_t = vdotq_u32(a, b, transmute(c));
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
 #[doc = "Dot product arithmetic (vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_s32)"]
 #[inline]
@@ -70469,7 +70221,6 @@ pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t {
 #[doc = "Dot product index form with signed and unsigned integers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_lane_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,i8mm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
@@ -70488,48 +70239,15 @@ pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t {
 )]
 pub fn vsudot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 1);
+    let c: uint32x2_t = vreinterpret_u32_u8(c);
     unsafe {
-        let c: uint32x2_t = transmute(c);
         let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vusdot_s32(a, transmute(c), b)
-    }
-}
-#[doc = "Dot product index form with signed and unsigned integers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_lane_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sudot, LANE = 0)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vsudot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: uint32x2_t = transmute(c);
-        let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        let ret_val: int32x2_t = vusdot_s32(a, transmute(c), b);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        vusdot_s32(a, vreinterpret_u8_u32(c), b)
     }
 }
 #[doc = "Dot product index form with signed and unsigned integers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_lane_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,i8mm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
@@ -70548,45 +70266,11 @@ pub fn vsudot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x8_t)
 )]
 pub fn vsudotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 1);
+    let c: uint32x2_t = vreinterpret_u32_u8(c);
     unsafe {
-        let c: uint32x2_t = transmute(c);
         let c: uint32x4_t =
             simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vusdotq_s32(a, transmute(c), b)
-    }
-}
-#[doc = "Dot product index form with signed and unsigned integers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_lane_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sudot, LANE = 0)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vsudotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let b: int8x16_t =
-            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: uint32x2_t = transmute(c);
-        let c: uint32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        let ret_val: int32x4_t = vusdotq_s32(a, transmute(c), b);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        vusdotq_s32(a, vreinterpretq_u8_u32(c), b)
     }
 }
 #[doc = "Dot product index form with signed and unsigned integers"]
@@ -72512,35 +72196,6 @@ pub fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[doc = "Dot product index form with unsigned and signed integers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_lane_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(usdot, LANE = 0)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vusdot_lane_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: int32x2_t = transmute(c);
-        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vusdot_s32(a, b, transmute(c))
-    }
-}
-#[doc = "Dot product index form with unsigned and signed integers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_lane_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,i8mm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))]
@@ -72559,20 +72214,15 @@ pub fn vusdot_lane_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x8_t)
 )]
 pub fn vusdot_lane_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 1);
+    let c: int32x2_t = vreinterpret_s32_s8(c);
     unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: int8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: int32x2_t = transmute(c);
         let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        let ret_val: int32x2_t = vusdot_s32(a, b, transmute(c));
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        vusdot_s32(a, b, vreinterpret_s8_s32(c))
     }
 }
 #[doc = "Dot product index form with unsigned and signed integers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_lane_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,i8mm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))]
@@ -72591,51 +72241,16 @@ pub fn vusdot_lane_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x8_t)
 )]
 pub fn vusdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 1);
+    let c: int32x2_t = vreinterpret_s32_s8(c);
     unsafe {
-        let c: int32x2_t = transmute(c);
         let c: int32x4_t =
             simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vusdotq_s32(a, b, transmute(c))
-    }
-}
-#[doc = "Dot product index form with unsigned and signed integers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_lane_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(usdot, LANE = 0)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vusdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let b: uint8x16_t =
-            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: int8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: int32x2_t = transmute(c);
-        let c: int32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        let ret_val: int32x4_t = vusdotq_s32(a, b, transmute(c));
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        vusdotq_s32(a, b, vreinterpretq_s8_s32(c))
     }
 }
 #[doc = "Dot product index form with unsigned and signed integers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_laneq_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[target_feature(enable = "neon,i8mm")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 3))]
@@ -72650,61 +72265,12 @@ pub fn vusdot_laneq_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x16_
     unsafe {
         let c: int32x4_t = transmute(c);
         let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vusdot_s32(a, b, transmute(c))
-    }
-}
-#[doc = "Dot product index form with unsigned and signed integers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_laneq_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(usdot, LANE = 3)
-)]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
-pub fn vusdot_laneq_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x16_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: int8x16_t =
-            simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: int32x4_t = transmute(c);
-        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        let ret_val: int32x2_t = vusdot_s32(a, b, transmute(c));
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        vusdot_s32(a, b, vreinterpret_s8_s32(c))
     }
 }
 #[doc = "Dot product index form with unsigned and signed integers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_laneq_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(usdot, LANE = 3)
-)]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
-pub fn vusdotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: int32x4_t = transmute(c);
-        let c: int32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vusdotq_s32(a, b, transmute(c))
-    }
-}
-#[doc = "Dot product index form with unsigned and signed integers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_laneq_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[target_feature(enable = "neon,i8mm")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 3))]
@@ -72717,16 +72283,10 @@ pub fn vusdotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x1
 pub fn vusdotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
     unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let b: uint8x16_t =
-            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: int8x16_t =
-            simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
         let c: int32x4_t = transmute(c);
         let c: int32x4_t =
             simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        let ret_val: int32x4_t = vusdotq_s32(a, b, transmute(c));
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        vusdotq_s32(a, b, vreinterpretq_s8_s32(c))
     }
 }
 #[doc = "Dot product vector form with unsigned and signed integers"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
index 7b8ddf43742d2..bfc8033fdd8c9 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@@ -6168,7 +6168,6 @@ intrinsics:
       - *neon-unstable-i8mm
       - *neon-cfg-arm-unstable
     static_defs: ["const LANE: i32"]
-    big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
     safety: safe
     types:
       - ['_lane_s32', int32x2_t, uint8x8_t, '[LANE as u32, LANE as u32]','']
@@ -6178,12 +6177,12 @@ intrinsics:
       - Let:
           - c
           - int32x2_t
-          - FnCall: [transmute, [c]] #- FnCall: ['vreinterpret_s32_s8', [c]]
+          - FnCall: ['vreinterpret_s32_s8', [c]]
       - Let:
           - c
           - "{type[1]}"
           - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
-      - FnCall: ["vusdot{neon_type[1].no}", [a, b, {FnCall: [transmute, [c]]}]] #'vreinterpret{type[4]}_s8_s32'
+      - FnCall: ["vusdot{neon_type[1].no}", [a, b, {FnCall: ['vreinterpret{type[4]}_s8_s32', [c]]}]]
 
   - name: "vsudot{neon_type[0].lane_nox}"
     doc: "Dot product index form with signed and unsigned integers"
@@ -6198,7 +6197,6 @@ intrinsics:
       - *neon-unstable-i8mm
       - *neon-cfg-arm-unstable
     static_defs: ["const LANE: i32"]
-    big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
     safety: safe
     types:
       - [int32x2_t, int8x8_t, uint8x8_t, '[LANE as u32, LANE as u32]', uint32x2_t,'']
@@ -6208,7 +6206,7 @@ intrinsics:
       - Let:
           - c
           - uint32x2_t
-          - FnCall: [transmute, [c]] #- FnCall: ['vreinterpret_u32_u8', [c]]
+          - FnCall: ['vreinterpret_u32_u8', [c]]
       - Let:
           - c
           - "{type[4]}"
@@ -6216,7 +6214,7 @@ intrinsics:
       - FnCall: 
           - "vusdot{neon_type[0].no}"
           - - a
-            - FnCall: [transmute, [c]] #- FnCall: ['vreinterpret{type[5]}_u8_u32', [c]]
+            - FnCall: ['vreinterpret{type[5]}_u8_u32', [c]]
             - b
 
   - name: "vmul{neon_type[1].no}"
@@ -6968,7 +6966,6 @@ intrinsics:
     doc: "Dot product index form with unsigned and signed integers"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
-    big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
     attr:
       - *neon-v8
       - *neon-i8mm
@@ -6985,8 +6982,7 @@ intrinsics:
       - FnCall: [static_assert_uimm_bits!, [LANE, '2']]
       - Let: [c, int32x4_t, {FnCall: [transmute, [c]]}]
       - Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]}]
-      - FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: [transmute, [c]]}]]
-      #- FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: ['vreinterpret{type[4]}_s8_s32', [c]]}]] 
+      - FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: ['vreinterpret{type[4]}_s8_s32', [c]]}]] 
 
   - name: "vsudot{neon_type[0].laneq_nox}"
     doc: "Dot product index form with signed and unsigned integers"
@@ -7025,7 +7021,6 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
     static_defs: ["const LANE: i32"]
-    big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
     attr:
       - *neon-v8
       - FnCall: [target_feature, ['enable = "neon,dotprod"']]
@@ -7042,8 +7037,7 @@ intrinsics:
       - Let:
           - c
           - "{neon_type[3]}"
-          - FnCall: [transmute, [c]]
-          #- FnCall: ['vreinterpretq_{neon_type[0]}_{neon_type[1]}', [c]]
+          - FnCall: ['vreinterpretq_{neon_type[0]}_{neon_type[1]}', [c]]
       - Let:
           - c
           - "{neon_type[0]}"
@@ -7052,15 +7046,13 @@ intrinsics:
           - "vdot{neon_type[0].no}"
           - - a
             - b
-            - FnCall: [transmute, [c]]
-            #- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
+            - FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
 
   - name: "vdot{neon_type[0].laneq_nox}"
     doc: Dot product arithmetic (indexed)
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
     static_defs: ["const LANE: i32"]
-    big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
     attr:
       - *neon-v8
       - FnCall: [target_feature, ['enable = "neon,dotprod"']]
@@ -7077,8 +7069,7 @@ intrinsics:
       - Let:
           - c
           - "{neon_type[3]}"
-          - FnCall: [transmute, [c]]
-          #- FnCall: ['vreinterpretq_{neon_type[0]}_{neon_type[1]}', [c]]
+          - FnCall: ['vreinterpretq_{neon_type[0]}_{neon_type[1]}', [c]]
       - Let:
           - c
           - "{neon_type[0]}"
@@ -7088,7 +7079,6 @@ intrinsics:
           - - a
             - b
             - FnCall: [transmute, [c]]
-            #- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
 
   - name: "vdot{neon_type[0].no}"
     doc: Dot product arithmetic (vector)
@@ -7151,7 +7141,6 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]]
       - *neon-cfg-arm-unstable
-    big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
     safety: safe
     types:
       - [int32x2_t, int8x8_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32]','']
@@ -7161,7 +7150,7 @@ intrinsics:
       - Let:
           - c
           - "{neon_type[3]}"
-          - FnCall: [transmute, [c]]
+          - FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]]
       - Let:
           - c
           - "{neon_type[0]}"
@@ -7170,7 +7159,7 @@ intrinsics:
           - "vdot{neon_type[0].no}"
           - - a
             - b
-            - FnCall: [transmute, [c]]
+            - FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
 
   - name: "vdot{neon_type[0].lane_nox}"
     doc: Dot product arithmetic (indexed)
@@ -7186,7 +7175,6 @@ intrinsics:
       - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]]
       - *neon-cfg-arm-unstable
     safety: safe
-    big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
     types:
       - [uint32x2_t, uint8x8_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32]','']
       - [uint32x4_t, uint8x16_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
@@ -7195,7 +7183,7 @@ intrinsics:
       - Let:
           - c
           - "{neon_type[3]}"
-          - FnCall: [transmute, [c]] #- FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]]
+          - FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]]
       - Let:
           - c
           - "{neon_type[0]}"
@@ -7204,7 +7192,7 @@ intrinsics:
           - "vdot{neon_type[0].no}"
           - - a
             - b
-            - FnCall: [transmute, [c]] #- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
+            - FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
 
   - name: "vmax{neon_type.no}"
     doc: Maximum (vector)

From 51f8123fa7a1e916c14e407c0084186dd3d6c33b Mon Sep 17 00:00:00 2001
From: David Wood <david.wood2@arm.com>
Date: Mon, 20 Apr 2026 14:21:22 +0100
Subject: [PATCH 04/78] gen-arm: use `neon-stable` anchor throughout

Instead of duplicating the stability attribute, re-use the `neon-stable`
anchor through
---
 .../spec/neon/aarch64.spec.yml                | 620 +++++++++---------
 1 file changed, 306 insertions(+), 314 deletions(-)

diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index e37f9f124be4c..e40775d68a70b 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -245,11 +245,7 @@ intrinsics:
     doc: Signed Absolute difference Long
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
-    attr:
-      - FnCall:
-          - stable
-          - - 'feature = "neon_intrinsics"'
-            - 'since = "1.59.0"'
+    attr: [*neon-stable]
     assert_instr: [sabdl2]
     safety: safe
     types:
@@ -288,11 +284,7 @@ intrinsics:
     doc: Signed Absolute difference Long
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
-    attr:
-      - FnCall:
-          - stable
-          - - 'feature = "neon_intrinsics"'
-            - 'since = "1.59.0"'
+    attr: [*neon-stable]
     assert_instr: [sabdl2]
     safety: safe
     types:
@@ -333,7 +325,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmeq]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [uint64x1_t, uint64x1_t]
@@ -351,7 +343,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmeq]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, uint64x1_t]
@@ -365,7 +357,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32", "u32"]
@@ -407,7 +399,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i64", "u64", "s64"]
@@ -426,7 +418,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmtst]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int64x1_t, uint64x1_t, 'i64x1', 'i64x1::new(0)']
@@ -444,7 +436,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tst]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i64", "u64", "s64"]
@@ -463,7 +455,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [suqadd]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_s32", "i32", "u32"]
@@ -481,7 +473,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [suqadd]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["b_s8", "i8", "u8", "s8"]
@@ -501,7 +493,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fabs]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - float64x1_t
@@ -515,7 +507,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmgt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int64x1_t, uint64x1_t]
@@ -529,7 +521,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmhi]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - uint64x1_t
@@ -543,7 +535,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmgt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, uint64x1_t]
@@ -557,7 +549,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32", "u32"]
@@ -599,7 +591,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmgt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int64x1_t, uint64x1_t]
@@ -613,7 +605,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmge]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int64x1_t, uint64x1_t]
@@ -627,7 +619,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmge]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, uint64x1_t]
@@ -641,7 +633,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32", "u32"]
@@ -683,7 +675,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmge]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int64x1_t, uint64x1_t]
@@ -697,7 +689,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmge]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
@@ -718,7 +710,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i64", "u64"]
@@ -735,7 +727,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmle]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
@@ -759,7 +751,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmle]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, uint32x2_t, f32x2, 'f32x2::new(0.0, 0.0)']
@@ -779,7 +771,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32", "u32"]
@@ -818,7 +810,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmlt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
@@ -842,7 +834,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmlt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, uint32x2_t, f32x2, 'f32x2::new(0.0, 0.0)']
@@ -862,7 +854,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32", "u32"]
@@ -901,7 +893,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [asr]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i64", "u64"]
@@ -918,7 +910,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, uint64x1_t]
@@ -936,7 +928,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32", "u32", i32]
@@ -975,7 +967,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, uint64x1_t]
@@ -993,7 +985,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32", "u32", i32]
@@ -1033,7 +1025,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, uint64x1_t]
@@ -1047,7 +1039,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32", "u32"]
@@ -1076,7 +1068,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, uint64x1_t]
@@ -1090,7 +1082,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32", "u32"]
@@ -1119,7 +1111,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int64x1_t, float64x1_t]
@@ -1133,7 +1125,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "i32", "f32", s32]
@@ -1147,7 +1139,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [uint64x1_t, float64x1_t]
@@ -1161,7 +1153,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["u32", "f32", "s_f32"]
@@ -1176,7 +1168,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1314,7 +1306,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1340,7 +1332,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1365,7 +1357,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1389,7 +1381,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f32", "i32", "s_s32_f32", "32"]
@@ -1403,7 +1395,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f32", "u32", "s_u32_f32"]
@@ -1488,7 +1480,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, float64x2_t]
@@ -1501,7 +1493,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x4_t, float64x2_t]
@@ -1556,7 +1548,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x2_t, float32x2_t]
@@ -1569,7 +1561,7 @@ intrinsics:
     return_type: "{neon_type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, float64x2_t, float32x4_t]
@@ -1586,7 +1578,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x2_t, float32x2_t]
@@ -1603,7 +1595,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f64", "f32"]
@@ -1621,7 +1613,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, float64x2_t, float32x4_t]
@@ -1639,7 +1631,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1662,7 +1654,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1756,7 +1748,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1779,7 +1771,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1801,7 +1793,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, int32x2_t, _s32_f32]
@@ -1842,7 +1834,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f32", "i32", 's_s32_f32']
@@ -1935,7 +1927,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f32", "u32", 's_u32_f32']
@@ -1953,7 +1945,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, int32x2_t]
@@ -1973,7 +1965,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f32", "i32", 's_s32_f32']
@@ -2106,7 +2098,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, int32x2_t]
@@ -2169,7 +2161,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f32", "i32", 's_s32_f32']
@@ -2187,7 +2179,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, int32x2_t]
@@ -2207,7 +2199,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f32", "i32", 's_s32_f32']
@@ -2225,7 +2217,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, uint32x2_t]
@@ -2245,7 +2237,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f32", "u32", 's_u32_f32']
@@ -2263,7 +2255,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, uint32x2_t]
@@ -2283,7 +2275,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f32", "u32", s_u32_f32]
@@ -2301,7 +2293,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, uint32x2_t]
@@ -2321,7 +2313,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f32", "u32", s_u32_f32, 'i32']
@@ -2458,7 +2450,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [dup, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -2475,7 +2467,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [dup, 'N = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -2492,7 +2484,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -2509,7 +2501,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -2527,7 +2519,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -2546,7 +2538,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -2567,7 +2559,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 4']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -2627,7 +2619,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 8']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -2645,7 +2637,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -2666,7 +2658,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ext, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -2682,7 +2674,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - float64x1_t
@@ -2696,7 +2688,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlal2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int16x8_t, int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', '[8, 9, 10, 11, 12, 13, 14, 15]']
@@ -2713,7 +2705,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlal2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
@@ -2736,7 +2728,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlsl2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int16x8_t, int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
@@ -2759,7 +2751,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlsl2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
@@ -6869,7 +6861,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, "f32"]
@@ -6887,7 +6879,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmv]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x4_t, "f32"]
@@ -7102,7 +7094,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i16", "i32"]
@@ -7117,7 +7109,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i32", "i64"]
@@ -7134,7 +7126,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int16x8_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]']
@@ -7150,7 +7142,7 @@ intrinsics:
     return_type: "{neon_type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int16x8_t, "i16", int32x4_t, int16x4_t, '[4, 5, 6, 7]']
@@ -7167,7 +7159,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7185,7 +7177,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, N = 4]]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7202,7 +7194,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7219,7 +7211,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7238,7 +7230,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7256,7 +7248,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2, N = 4]]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7274,7 +7266,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, 'N = 4']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7291,7 +7283,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7307,7 +7299,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int32x4_t, int16x8_t, int16x8_t, int32x4_t, _high_s16]
@@ -7324,7 +7316,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal2, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7342,7 +7334,7 @@ intrinsics:
     return_type: "{type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i32", "i16", "s16"]
@@ -7356,7 +7348,7 @@ intrinsics:
     return_type: "{type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i64", "i32", "i32", "i64"]
@@ -7371,7 +7363,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -7390,7 +7382,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7406,7 +7398,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7421,7 +7413,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int32x4_t, int16x8_t, int16x8_t, int32x4_t, _high_s16]
@@ -7438,7 +7430,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl2, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7456,7 +7448,7 @@ intrinsics:
     return_type: "{type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i32", "i16"]
@@ -7470,7 +7462,7 @@ intrinsics:
     return_type: "{type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i64", "i32", "i32", "i64"]
@@ -7485,7 +7477,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -7504,7 +7496,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7520,7 +7512,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7535,7 +7527,7 @@ intrinsics:
     return_type: "{type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmulh]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i16", "i16", "i16", int16x4_t, 'h_s16']
@@ -7552,7 +7544,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmulh, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7570,7 +7562,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmulh, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7587,7 +7579,7 @@ intrinsics:
     return_type: "{neon_type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtn2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
@@ -7602,7 +7594,7 @@ intrinsics:
     return_type: "{neon_type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqxtn2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [uint8x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
@@ -7617,7 +7609,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtn]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i16", "i8", 'h_s16', s16]
@@ -7631,7 +7623,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqxtn]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["u16", "u8", 'h_u16', 'u16']
@@ -7645,7 +7637,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtn]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i64", "i32"]
@@ -7662,7 +7654,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqxtn]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["u64", "u32"]
@@ -7679,7 +7671,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtun]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i16", "u8", 'h_s16', s16]
@@ -7694,7 +7686,7 @@ intrinsics:
     return_type: "{neon_type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtun2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [uint8x8_t, int16x8_t, uint8x16_t, s16, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
@@ -7709,7 +7701,7 @@ intrinsics:
     return_type: "{type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmulh]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i16", 'h_s16', 's16']
@@ -7724,7 +7716,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmulh, LANE = 1]]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -7910,7 +7902,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ['s_s32', "i32"]
@@ -7928,7 +7920,7 @@ intrinsics:
     return_type: "{type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i8", 'b_s8', int8x8_t, s8]
@@ -7944,7 +7936,7 @@ intrinsics:
     return_type: "{type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["u32", "i32", 's_u32']
@@ -7962,7 +7954,7 @@ intrinsics:
     return_type: "{type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["u8", "i8", "b_u8", uint8x8_t, int8x8_t, s8]
@@ -7979,7 +7971,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrn, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7998,7 +7990,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrn2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8016,7 +8008,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshrn, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8035,7 +8027,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshrn2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8059,7 +8051,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrun, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8086,7 +8078,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrun2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8109,7 +8101,7 @@ intrinsics:
     return_type: "{type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - i64
@@ -8126,7 +8118,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [b_s8, "i8", int8x8_t]
@@ -8149,7 +8141,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshl, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8172,7 +8164,7 @@ intrinsics:
     return_type: "{type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["u64", "i64"]
@@ -8189,7 +8181,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [b_u8, "u8", "i8", uint8x8_t, int8x8_t]
@@ -8212,7 +8204,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshl, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8234,7 +8226,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrn, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8258,7 +8250,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrn, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8280,7 +8272,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrn2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8302,7 +8294,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshrn, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8326,7 +8318,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshrn, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8348,7 +8340,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshrn2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8370,7 +8362,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrun, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8393,7 +8385,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrun2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8414,7 +8406,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [usqadd]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [b_u8, "u8", "i8", s8]
@@ -8434,7 +8426,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [usqadd]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [s_u32, "u32", "i32"]
@@ -8452,7 +8444,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fsqrt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - float32x2_t
@@ -8499,7 +8491,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrts]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [_f64, float64x1_t, v1f64]
@@ -8517,7 +8509,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrts]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [s_f32, "f32"]
@@ -8556,7 +8548,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpe]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [_f64, float64x1_t, v1f64]
@@ -8574,7 +8566,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpe]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [s_f32, "f32"]
@@ -8613,7 +8605,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecps]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [_f64, float64x1_t, v1f64]
@@ -8631,7 +8623,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecps]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [s_f32, "f32"]
@@ -8670,7 +8662,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [s_f32, "f32"]
@@ -8802,7 +8794,7 @@ intrinsics:
     return_type: "{type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [srshl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - "i64"
@@ -8819,7 +8811,7 @@ intrinsics:
     return_type: "{type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [urshl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["u64", "i64"]
@@ -8837,7 +8829,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [srshr, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8853,7 +8845,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [urshr, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8869,7 +8861,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rshrn2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8894,7 +8886,7 @@ intrinsics:
     attr:
       - *little-endian
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rsubhn2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, int16x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
@@ -8917,7 +8909,7 @@ intrinsics:
     attr:
       - *big-endian
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rsubhn]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, int16x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
@@ -8940,7 +8932,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1', '3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE1: i32, const LANE2: i32']
     safety: safe
     types:
@@ -8965,7 +8957,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1', '3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE1: i32, const LANE2: i32']
     safety: safe
     types:
@@ -8990,7 +8982,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1', '3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE1: i32, const LANE2: i32']
     safety: safe
     types:
@@ -9019,7 +9011,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1', '3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE1: i32, const LANE2: i32']
     safety: safe
     types:
@@ -9045,7 +9037,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 1', 'LANE2 = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1', '3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE1: i32, const LANE2: i32']
     safety: safe
     types:
@@ -9066,7 +9058,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 1', 'LANE2 = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1', '3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE1: i32, const LANE2: i32']
     safety: safe
     types:
@@ -9083,7 +9075,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["u64", float64x1_t]
@@ -9097,7 +9089,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -9113,7 +9105,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -9128,7 +9120,7 @@ intrinsics:
     return_type: "{type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sshl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - "i64"
@@ -9146,7 +9138,7 @@ intrinsics:
     return_type: "{type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ushl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["u64", "i64"]
@@ -9165,7 +9157,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sshll2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -9184,7 +9176,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ushll2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -9203,7 +9195,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [shrn2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -10011,7 +10003,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
       - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uabal2]]}]]
     safety: safe
     types:
@@ -10164,7 +10156,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmaxnmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - float32x2_t
@@ -10182,7 +10174,7 @@ intrinsics:
     arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety:
       unsafe: [neon]
     types:
@@ -10205,7 +10197,7 @@ intrinsics:
     arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety:
       unsafe: [neon]
     types:
@@ -10229,7 +10221,7 @@ intrinsics:
     arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety:
       unsafe: [neon]
     types:
@@ -10256,7 +10248,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmla, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -10350,7 +10342,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -10370,7 +10362,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -10390,7 +10382,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmla, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -10410,7 +10402,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -10429,7 +10421,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -10502,7 +10494,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - float64x1_t
@@ -10516,7 +10508,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - float64x2_t
@@ -10530,7 +10522,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - float64x1_t
@@ -10545,7 +10537,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -10565,7 +10557,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -10581,7 +10573,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -10597,7 +10589,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -10613,7 +10605,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -10631,7 +10623,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmeq]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, uint32x2_t, 'f32x2', 'f32x2::new(0.0, 0.0)']
@@ -10665,7 +10657,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f32", "u32", "s_f32"]
@@ -10704,7 +10696,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i64", "u64", "s64"]
@@ -10722,7 +10714,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmeq]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
@@ -10750,7 +10742,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmeq]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [uint8x8_t, uint8x8_t, u8x8, 'u8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
@@ -10774,7 +10766,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmhs]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - uint64x1_t
@@ -10788,7 +10780,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32", "u32"]
@@ -10830,7 +10822,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmge]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, uint64x1_t]
@@ -10844,7 +10836,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["d_s64", "i64", "u64", s64]
@@ -10863,7 +10855,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmhi]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - uint64x1_t
@@ -10877,7 +10869,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s64", "i64", "u64"]
@@ -10896,7 +10888,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmtst]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [uint64x1_t, u64x1, 'u64x1::new(0)']
@@ -10912,7 +10904,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmge]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, uint32x2_t, f32x2, 'f32x2::new(0.0, 0.0)']
@@ -10932,7 +10924,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32", "u32"]
@@ -10972,7 +10964,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i64", "u64"]
@@ -10987,7 +10979,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i64", "u64", 's64']
@@ -11006,7 +10998,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmgt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
@@ -11030,7 +11022,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i64", "u64"]
@@ -11047,7 +11039,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmgt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, uint32x2_t, f32x2, 'f32x2::new(0.0, 0.0)']
@@ -11064,7 +11056,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32", "u32"]
@@ -11103,7 +11095,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, uint64x1_t]
@@ -11121,7 +11113,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, "f64"]
@@ -11139,7 +11131,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -11160,7 +11152,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -11179,7 +11171,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -11196,7 +11188,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -11217,7 +11209,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -11277,7 +11269,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -11318,7 +11310,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [srshr, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -11334,7 +11326,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlsl2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int32x4_t, int16x8_t, "i16"]
@@ -11348,7 +11340,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlsl2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [uint32x4_t, uint16x8_t, "u16"]
@@ -11363,7 +11355,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlsl2, 'LANE = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -11386,7 +11378,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlsl2, 'LANE = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -11408,7 +11400,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmgt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, uint64x1_t]
@@ -11422,7 +11414,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f32", "u32", 's_f32']
@@ -11464,7 +11456,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uabdl2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [uint8x16_t, uint16x8_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
@@ -11481,7 +11473,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, "f64"]
@@ -11498,7 +11490,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x2_t, "f64"]
@@ -11539,7 +11531,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ['s_f32', float32x2_t, "f32"]
@@ -11557,7 +11549,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmaxnmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ['s_f32', float32x2_t, "f32"]
@@ -11575,7 +11567,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s64", "i64", "u64"]
@@ -11595,7 +11587,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmulh, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -11617,7 +11609,7 @@ intrinsics:
     arguments: ["a: {type[0]}"]
     return_type: "{type[0]}"
     attr:
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
       - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sqabs]]}]]
     safety: safe
     types:
@@ -11634,7 +11626,7 @@ intrinsics:
     arguments: ["a: {type[0]}"]
     return_type: "{type[0]}"
     attr:
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
       - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sqabs]]}]]
     safety: safe
     types:
@@ -11653,7 +11645,7 @@ intrinsics:
     return_type: "{neon_type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smull2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int16x8_t, "i16", int32x4_t]
@@ -11670,7 +11662,7 @@ intrinsics:
     return_type: "{neon_type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umull2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [uint16x8_t, "u16", uint32x4_t]
@@ -11688,7 +11680,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smull2, 'LANE = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -11710,7 +11702,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umull2, 'LANE = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -11731,7 +11723,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrte]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - float64x1_t
@@ -11749,7 +11741,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrte]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32"]
@@ -11788,7 +11780,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - float32x2_t
@@ -11808,7 +11800,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshlu, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -11831,7 +11823,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, uint32x2_t]
@@ -11873,7 +11865,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, int64x1_t]
@@ -11964,7 +11956,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlal2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int32x4_t, int16x8_t, "i16"]
@@ -11982,7 +11974,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlal2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [uint32x4_t, uint16x8_t, "u16"]
@@ -12001,7 +11993,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlal2, 'LANE = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -12020,7 +12012,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlal2, 'LANE = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -12039,7 +12031,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [urshr, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -12055,7 +12047,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmhs]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - uint64x1_t
@@ -12069,7 +12061,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4r]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety:
       unsafe: [neon]
     types:
@@ -12092,7 +12084,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4r]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety:
       unsafe: [neon]
     types:
@@ -12109,7 +12101,7 @@ intrinsics:
     attr:
       - FnCall: [target_feature, ['enable = "neon,aes"']]
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4r]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety:
       unsafe: [neon]
     types:
@@ -12125,7 +12117,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, int8x8x4_t]
@@ -12147,7 +12139,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     big_endian_inverse: false
     safety: safe
     types:
@@ -12173,7 +12165,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, 'int8x8_t', 'unsafe {{ transmute(b) }}']
@@ -12194,7 +12186,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8x2_t, 'int8x8_t']
@@ -12215,7 +12207,7 @@ intrinsics:
     return_type: "{neon_type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     big_endian_inverse: false
     safety: safe
     types:
@@ -12240,7 +12232,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8x3_t, 'int8x8_t', 'int8x16x2']
@@ -12265,7 +12257,7 @@ intrinsics:
     return_type: "{neon_type[3]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [uint8x8x3_t, 'uint8x8_t', 'uint8x16x2', 'uint8x8_t']
@@ -12292,7 +12284,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8x4_t, 'int8x8_t', 'int8x16x2']
@@ -12317,7 +12309,7 @@ intrinsics:
     return_type: "{neon_type[3]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [uint8x8x4_t, 'uint8x8_t', 'uint8x16x2', 'uint8x8_t']
@@ -12344,7 +12336,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, int8x16_t, uint8x8_t, vqtbx1]
@@ -12358,7 +12350,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [uint8x8_t, "uint8x16_t", uint8x8_t, "vqtbx1", "_u8"]
@@ -12381,7 +12373,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, "int8x8_t", "transmute(c)", "i8x8::splat(8)", "int8x8"]
@@ -12411,7 +12403,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, 'int8x8x2_t']
@@ -12430,7 +12422,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     big_endian_inverse: false
     safety: safe
     types:
@@ -12453,7 +12445,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, 'int8x8x3_t', 'int8x16x2', 'i8x8::splat(24)', 'int8x8']
@@ -12488,7 +12480,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [uint8x8_t, 'uint8x8x3_t', 'uint8x16x2', 'u8x8::splat(24)', 'uint8x8']
@@ -12525,7 +12517,7 @@ intrinsics:
     return_type: "{neon_type[3]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ['int8x16_t', uint8x8_t, 'vqtbl1', 'int8x8_t']
@@ -12539,7 +12531,7 @@ intrinsics:
     return_type: "{neon_type[3]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ['uint8x16_t', uint8x8_t, 'vqtbl1', 'uint8x8_t']
@@ -12561,7 +12553,7 @@ intrinsics:
     return_type: "{neon_type[3]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ['int8x16x2_t', uint8x8_t, 'vqtbl2', 'int8x8_t']
@@ -12575,7 +12567,7 @@ intrinsics:
     return_type: "{neon_type[3]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     big_endian_inverse: false
     safety: safe
     types:
@@ -12598,7 +12590,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, 'int8x16x2_t', uint8x8_t, 'vqtbx2']
@@ -12612,7 +12604,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     big_endian_inverse: false
     safety: safe
     types:
@@ -12636,7 +12628,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     big_endian_inverse: false
     safety: safe
     types:
@@ -12651,7 +12643,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     big_endian_inverse: false
     safety: safe
     types:
@@ -12675,7 +12667,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, 'int8x16x3_t', uint8x8_t, 'vqtbx3']
@@ -12689,7 +12681,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     big_endian_inverse: false
     safety: safe
     types:
@@ -12714,7 +12706,7 @@ intrinsics:
     return_type: "{neon_type[3]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     big_endian_inverse: false
     safety: safe
     types:
@@ -12729,7 +12721,7 @@ intrinsics:
     return_type: "{neon_type[3]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     big_endian_inverse: false
     safety: safe
     types:
@@ -12754,7 +12746,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, 'int8x16x4_t', uint8x8_t, 'vqtbx4']
@@ -12768,7 +12760,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     big_endian_inverse: false
     safety: safe
     types:
@@ -12795,7 +12787,7 @@ intrinsics:
     return_type: "{neon_type[3]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["vqtbl1", "int8x16_t", "uint8x8_t", "int8x8_t"]
@@ -12814,7 +12806,7 @@ intrinsics:
     return_type: "{neon_type[3]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["vqtbl2", "int8x16_t", "uint8x8_t", "int8x8_t"]
@@ -12833,7 +12825,7 @@ intrinsics:
     return_type: "{neon_type[3]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     big_endian_inverse: false
     safety: safe
     types:
@@ -12853,7 +12845,7 @@ intrinsics:
     return_type: "{neon_type[3]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     big_endian_inverse: false
     safety: safe
     types:
@@ -12873,7 +12865,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [vqtbx1, "int8x8_t", "int8x16_t", "uint8x8_t"]
@@ -12892,7 +12884,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [vqtbx2, "int8x8_t", "int8x16_t", "uint8x8_t"]
@@ -12911,7 +12903,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [vqtbx3, "int8x8_t", "int8x16_t", "uint8x8_t"]
@@ -12930,7 +12922,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [vqtbx4, "int8x8_t", "int8x16_t", "uint8x8_t"]
@@ -12949,7 +12941,7 @@ intrinsics:
     attr:
       - FnCall: [target_feature, ['enable = "{type[2]}"']]
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ldr]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety:
       unsafe: [neon]
     types:
@@ -13016,7 +13008,7 @@ intrinsics:
       - FnCall: [target_feature, ['enable = "{type[2]}"']]
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [str]]}]]
       - FnCall: [allow, ['clippy::cast_ptr_alignment']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety:
       unsafe: [neon]
     types:

From 3d5f588f83cb8087eb5dd5a244fa2a3a26cfcfb4 Mon Sep 17 00:00:00 2001
From: David Wood <david.wood2@arm.com>
Date: Fri, 1 May 2026 12:26:25 +0100
Subject: [PATCH 05/78] gen-arm: skip `assert_instr` on big-endian

Some intrinsics optimise to different instructions on big endian which
leads to `assert_instr` failing
---
 .../core_arch/src/aarch64/neon/generated.rs   | 1323 ++++++++++++-----
 .../src/arm_shared/neon/generated.rs          |  240 ++-
 .../spec/neon/aarch64.spec.yml                |  238 +--
 .../spec/neon/arm_shared.spec.yml             |   30 +-
 4 files changed, 1297 insertions(+), 534 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 898ccfc9edeae..8e3493dfac128 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -65,7 +65,10 @@ pub fn __jcvt(a: f64) -> i32 {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sabal2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(sabal2)
+)]
 pub fn vabal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
     unsafe {
         let d: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
@@ -80,7 +83,10 @@ pub fn vabal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sabal2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(sabal2)
+)]
 pub fn vabal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
     unsafe {
         let d: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
@@ -95,7 +101,10 @@ pub fn vabal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sabal2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(sabal2)
+)]
 pub fn vabal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
     unsafe {
         let d: int32x2_t = simd_shuffle!(b, b, [2, 3]);
@@ -110,7 +119,10 @@ pub fn vabal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uabal2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uabal2)
+)]
 pub fn vabal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
     unsafe {
         let d: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
@@ -124,7 +136,10 @@ pub fn vabal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uabal2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uabal2)
+)]
 pub fn vabal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
     unsafe {
         let d: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
@@ -138,7 +153,10 @@ pub fn vabal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uabal2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uabal2)
+)]
 pub fn vabal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
     unsafe {
         let d: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
@@ -212,7 +230,7 @@ pub fn vabdh_f16(a: f16, b: f16) -> f16 {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sabdl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sabdl2))]
 pub fn vabdl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
     unsafe {
         let c: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
@@ -226,7 +244,7 @@ pub fn vabdl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sabdl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sabdl2))]
 pub fn vabdl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
     unsafe {
         let c: int32x2_t = simd_shuffle!(a, a, [2, 3]);
@@ -240,7 +258,7 @@ pub fn vabdl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sabdl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sabdl2))]
 pub fn vabdl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
     unsafe {
         let c: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
@@ -253,8 +271,8 @@ pub fn vabdl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uabdl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uabdl2))]
 pub fn vabdl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
     unsafe {
         let c: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
@@ -266,8 +284,8 @@ pub fn vabdl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uabdl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uabdl2))]
 pub fn vabdl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
     unsafe {
         let c: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
@@ -279,8 +297,8 @@ pub fn vabdl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uabdl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uabdl2))]
 pub fn vabdl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
     unsafe {
         let c: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
@@ -4092,7 +4110,10 @@ pub fn vcmlaq_rot90_laneq_f32<const LANE: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_lane_f32<const LANE1: i32, const LANE2: i32>(
@@ -4113,7 +4134,10 @@ pub fn vcopy_lane_f32<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_lane_s8<const LANE1: i32, const LANE2: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
@@ -4137,7 +4161,10 @@ pub fn vcopy_lane_s8<const LANE1: i32, const LANE2: i32>(a: int8x8_t, b: int8x8_
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_lane_s16<const LANE1: i32, const LANE2: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
@@ -4157,7 +4184,10 @@ pub fn vcopy_lane_s16<const LANE1: i32, const LANE2: i32>(a: int16x4_t, b: int16
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_lane_s32<const LANE1: i32, const LANE2: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
@@ -4175,7 +4205,10 @@ pub fn vcopy_lane_s32<const LANE1: i32, const LANE2: i32>(a: int32x2_t, b: int32
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_lane_u8<const LANE1: i32, const LANE2: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
@@ -4199,7 +4232,10 @@ pub fn vcopy_lane_u8<const LANE1: i32, const LANE2: i32>(a: uint8x8_t, b: uint8x
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_lane_u16<const LANE1: i32, const LANE2: i32>(
@@ -4222,7 +4258,10 @@ pub fn vcopy_lane_u16<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_lane_u32<const LANE1: i32, const LANE2: i32>(
@@ -4243,7 +4282,10 @@ pub fn vcopy_lane_u32<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_lane_p8<const LANE1: i32, const LANE2: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
@@ -4267,7 +4309,10 @@ pub fn vcopy_lane_p8<const LANE1: i32, const LANE2: i32>(a: poly8x8_t, b: poly8x
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_p16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_lane_p16<const LANE1: i32, const LANE2: i32>(
@@ -4290,7 +4335,10 @@ pub fn vcopy_lane_p16<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_laneq_f32<const LANE1: i32, const LANE2: i32>(
@@ -4312,7 +4360,10 @@ pub fn vcopy_laneq_f32<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_laneq_s8<const LANE1: i32, const LANE2: i32>(a: int8x8_t, b: int8x16_t) -> int8x8_t {
@@ -4338,7 +4389,10 @@ pub fn vcopy_laneq_s8<const LANE1: i32, const LANE2: i32>(a: int8x8_t, b: int8x1
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_laneq_s16<const LANE1: i32, const LANE2: i32>(
@@ -4362,7 +4416,10 @@ pub fn vcopy_laneq_s16<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_laneq_s32<const LANE1: i32, const LANE2: i32>(
@@ -4384,7 +4441,10 @@ pub fn vcopy_laneq_s32<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_laneq_u8<const LANE1: i32, const LANE2: i32>(
@@ -4413,7 +4473,10 @@ pub fn vcopy_laneq_u8<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_laneq_u16<const LANE1: i32, const LANE2: i32>(
@@ -4437,7 +4500,10 @@ pub fn vcopy_laneq_u16<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_laneq_u32<const LANE1: i32, const LANE2: i32>(
@@ -4459,7 +4525,10 @@ pub fn vcopy_laneq_u32<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_laneq_p8<const LANE1: i32, const LANE2: i32>(
@@ -4488,7 +4557,10 @@ pub fn vcopy_laneq_p8<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_p16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_laneq_p16<const LANE1: i32, const LANE2: i32>(
@@ -4536,7 +4608,10 @@ pub fn vcopyq_lane_f32<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 1, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_lane_f64<const LANE1: i32, const LANE2: i32>(
@@ -4558,7 +4633,10 @@ pub fn vcopyq_lane_f64<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 1, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_lane_s64<const LANE1: i32, const LANE2: i32>(
@@ -4580,7 +4658,10 @@ pub fn vcopyq_lane_s64<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 1, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_lane_u64<const LANE1: i32, const LANE2: i32>(
@@ -4602,7 +4683,10 @@ pub fn vcopyq_lane_u64<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 1, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_lane_p64<const LANE1: i32, const LANE2: i32>(
@@ -4624,7 +4708,10 @@ pub fn vcopyq_lane_p64<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_lane_s8<const LANE1: i32, const LANE2: i32>(a: int8x16_t, b: int8x8_t) -> int8x16_t {
@@ -4994,7 +5081,10 @@ pub fn vcopyq_lane_s8<const LANE1: i32, const LANE2: i32>(a: int8x16_t, b: int8x
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_lane_s16<const LANE1: i32, const LANE2: i32>(
@@ -5022,7 +5112,10 @@ pub fn vcopyq_lane_s16<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_lane_s32<const LANE1: i32, const LANE2: i32>(
@@ -5046,7 +5139,10 @@ pub fn vcopyq_lane_s32<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_lane_u8<const LANE1: i32, const LANE2: i32>(
@@ -5419,7 +5515,10 @@ pub fn vcopyq_lane_u8<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_lane_u16<const LANE1: i32, const LANE2: i32>(
@@ -5447,7 +5546,10 @@ pub fn vcopyq_lane_u16<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_lane_u32<const LANE1: i32, const LANE2: i32>(
@@ -5471,7 +5573,10 @@ pub fn vcopyq_lane_u32<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_lane_p8<const LANE1: i32, const LANE2: i32>(
@@ -5844,7 +5949,10 @@ pub fn vcopyq_lane_p8<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_lane_p16<const LANE1: i32, const LANE2: i32>(
@@ -5872,7 +5980,10 @@ pub fn vcopyq_lane_p16<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_f32<const LANE1: i32, const LANE2: i32>(
@@ -5895,7 +6006,10 @@ pub fn vcopyq_laneq_f32<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_f64<const LANE1: i32, const LANE2: i32>(
@@ -5916,7 +6030,10 @@ pub fn vcopyq_laneq_f64<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_s8<const LANE1: i32, const LANE2: i32>(
@@ -6287,7 +6404,10 @@ pub fn vcopyq_laneq_s8<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_s16<const LANE1: i32, const LANE2: i32>(
@@ -6314,7 +6434,10 @@ pub fn vcopyq_laneq_s16<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_s32<const LANE1: i32, const LANE2: i32>(
@@ -6337,7 +6460,10 @@ pub fn vcopyq_laneq_s32<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_s64<const LANE1: i32, const LANE2: i32>(
@@ -6358,7 +6484,10 @@ pub fn vcopyq_laneq_s64<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_u8<const LANE1: i32, const LANE2: i32>(
@@ -6729,7 +6858,10 @@ pub fn vcopyq_laneq_u8<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_u16<const LANE1: i32, const LANE2: i32>(
@@ -6756,7 +6888,10 @@ pub fn vcopyq_laneq_u16<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_u32<const LANE1: i32, const LANE2: i32>(
@@ -6779,7 +6914,10 @@ pub fn vcopyq_laneq_u32<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_u64<const LANE1: i32, const LANE2: i32>(
@@ -6800,7 +6938,10 @@ pub fn vcopyq_laneq_u64<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_p8<const LANE1: i32, const LANE2: i32>(
@@ -7171,7 +7312,10 @@ pub fn vcopyq_laneq_p8<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_p16<const LANE1: i32, const LANE2: i32>(
@@ -7198,7 +7342,10 @@ pub fn vcopyq_laneq_p16<const LANE1: i32, const LANE2: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_p64<const LANE1: i32, const LANE2: i32>(
@@ -7228,7 +7375,7 @@ pub fn vcreate_f64(a: u64) -> float64x1_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtn))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fcvtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcvt_f32_f64(a: float64x2_t) -> float32x2_t {
     unsafe { simd_cast(a) }
@@ -7282,7 +7429,7 @@ pub fn vcvtq_f64_u64(a: uint64x2_t) -> float64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f16_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fcvtn2))]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_high_f16_f32(a: float16x4_t, b: float32x4_t) -> float16x8_t {
@@ -7292,7 +7439,7 @@ pub fn vcvt_high_f16_f32(a: float16x4_t, b: float32x4_t) -> float16x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f32_f16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fcvtl2))]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_high_f32_f16(a: float16x8_t) -> float32x4_t {
@@ -7302,7 +7449,7 @@ pub fn vcvt_high_f32_f16(a: float16x8_t) -> float32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f32_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fcvtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcvt_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
     unsafe { simd_shuffle!(a, simd_cast(b), [0, 1, 2, 3]) }
@@ -7311,7 +7458,7 @@ pub fn vcvt_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f64_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fcvtl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcvt_high_f64_f32(a: float32x4_t) -> float64x2_t {
     unsafe {
@@ -9459,7 +9606,7 @@ pub fn vcvtd_u64_f64(a: f64) -> u64 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtx_f32_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtxn))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fcvtxn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcvtx_f32_f64(a: float64x2_t) -> float32x2_t {
     unsafe extern "unadjusted" {
@@ -9475,7 +9622,7 @@ pub fn vcvtx_f32_f64(a: float64x2_t) -> float32x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtx_high_f32_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtxn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fcvtxn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcvtx_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
     unsafe { simd_shuffle!(a, vcvtx_f32_f64(b), [0, 1, 2, 3]) }
@@ -10260,7 +10407,7 @@ pub fn vfma_lane_f64<const LANE: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_laneq_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmadd, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vfma_laneq_f64<const LANE: i32>(
@@ -10327,7 +10474,7 @@ pub fn vfmah_f16(a: f16, b: f16, c: f16) -> f16 {
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmah_lane_f16)"]
 #[inline]
-#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmadd, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
@@ -10342,7 +10489,7 @@ pub fn vfmah_lane_f16<const LANE: i32>(a: f16, b: f16, v: float16x4_t) -> f16 {
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmah_laneq_f16)"]
 #[inline]
-#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmadd, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
@@ -10391,7 +10538,7 @@ pub fn vfmaq_n_f64(a: float64x2_t, b: float64x2_t, c: f64) -> float64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmas_lane_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmadd, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vfmas_lane_f32<const LANE: i32>(a: f32, b: f32, c: float32x2_t) -> f32 {
@@ -10405,7 +10552,7 @@ pub fn vfmas_lane_f32<const LANE: i32>(a: f32, b: f32, c: float32x2_t) -> f32 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmas_laneq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmadd, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vfmas_laneq_f32<const LANE: i32>(a: f32, b: f32, c: float32x4_t) -> f32 {
@@ -10419,7 +10566,7 @@ pub fn vfmas_laneq_f32<const LANE: i32>(a: f32, b: f32, c: float32x4_t) -> f32 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmad_laneq_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmadd, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vfmad_laneq_f64<const LANE: i32>(a: f64, b: f64, c: float64x2_t) -> f64 {
@@ -11015,7 +11162,7 @@ pub fn vfms_lane_f64<const LANE: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_laneq_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmsub, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vfms_laneq_f64<const LANE: i32>(
@@ -11068,7 +11215,7 @@ pub fn vfmsh_f16(a: f16, b: f16, c: f16) -> f16 {
 #[doc = "Floating-point fused multiply-subtract from accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsh_lane_f16)"]
 #[inline]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmsub, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
@@ -11083,7 +11230,7 @@ pub fn vfmsh_lane_f16<const LANE: i32>(a: f16, b: f16, v: float16x4_t) -> f16 {
 #[doc = "Floating-point fused multiply-subtract from accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsh_laneq_f16)"]
 #[inline]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmsub, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
@@ -11135,7 +11282,7 @@ pub fn vfmsq_n_f64(a: float64x2_t, b: float64x2_t, c: f64) -> float64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmss_lane_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmsub, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vfmss_lane_f32<const LANE: i32>(a: f32, b: f32, c: float32x2_t) -> f32 {
@@ -11145,7 +11292,7 @@ pub fn vfmss_lane_f32<const LANE: i32>(a: f32, b: f32, c: float32x2_t) -> f32 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmss_laneq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmsub, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vfmss_laneq_f32<const LANE: i32>(a: f32, b: f32, c: float32x4_t) -> f32 {
@@ -11155,7 +11302,7 @@ pub fn vfmss_laneq_f32<const LANE: i32>(a: f32, b: f32, c: float32x4_t) -> f32 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsd_lane_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmsub, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vfmsd_lane_f64<const LANE: i32>(a: f64, b: f64, c: float64x1_t) -> f64 {
@@ -11165,7 +11312,7 @@ pub fn vfmsd_lane_f64<const LANE: i32>(a: f64, b: f64, c: float64x1_t) -> f64 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsd_laneq_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmsub, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vfmsd_laneq_f64<const LANE: i32>(a: f64, b: f64, c: float64x2_t) -> f64 {
@@ -14210,7 +14357,7 @@ pub fn vmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
@@ -14221,7 +14368,7 @@ pub fn vmlal_high_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_laneq_s16<const LANE: i32>(
@@ -14236,7 +14383,7 @@ pub fn vmlal_high_laneq_s16<const LANE: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
@@ -14247,7 +14394,7 @@ pub fn vmlal_high_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_laneq_s32<const LANE: i32>(
@@ -14262,7 +14409,7 @@ pub fn vmlal_high_laneq_s32<const LANE: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_lane_u16<const LANE: i32>(
@@ -14277,7 +14424,7 @@ pub fn vmlal_high_lane_u16<const LANE: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_laneq_u16<const LANE: i32>(
@@ -14292,7 +14439,7 @@ pub fn vmlal_high_laneq_u16<const LANE: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_lane_u32<const LANE: i32>(
@@ -14307,7 +14454,7 @@ pub fn vmlal_high_lane_u32<const LANE: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_laneq_u32<const LANE: i32>(
@@ -14322,7 +14469,7 @@ pub fn vmlal_high_laneq_u32<const LANE: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
     vmlal_high_s16(a, b, vdupq_n_s16(c))
@@ -14331,7 +14478,7 @@ pub fn vmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
     vmlal_high_s32(a, b, vdupq_n_s32(c))
@@ -14340,7 +14487,7 @@ pub fn vmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t {
     vmlal_high_u16(a, b, vdupq_n_u16(c))
@@ -14349,7 +14496,7 @@ pub fn vmlal_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t {
     vmlal_high_u32(a, b, vdupq_n_u32(c))
@@ -14358,7 +14505,7 @@ pub fn vmlal_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
     unsafe {
@@ -14371,7 +14518,7 @@ pub fn vmlal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
     unsafe {
@@ -14384,7 +14531,7 @@ pub fn vmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
     unsafe {
@@ -14397,7 +14544,7 @@ pub fn vmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
     unsafe {
@@ -14410,7 +14557,7 @@ pub fn vmlal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
     unsafe {
@@ -14423,7 +14570,7 @@ pub fn vmlal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
     unsafe {
@@ -14454,7 +14601,7 @@ pub fn vmlsq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
@@ -14465,7 +14612,7 @@ pub fn vmlsl_high_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_laneq_s16<const LANE: i32>(
@@ -14480,7 +14627,7 @@ pub fn vmlsl_high_laneq_s16<const LANE: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
@@ -14491,7 +14638,7 @@ pub fn vmlsl_high_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_laneq_s32<const LANE: i32>(
@@ -14506,7 +14653,7 @@ pub fn vmlsl_high_laneq_s32<const LANE: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_lane_u16<const LANE: i32>(
@@ -14521,7 +14668,7 @@ pub fn vmlsl_high_lane_u16<const LANE: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_laneq_u16<const LANE: i32>(
@@ -14536,7 +14683,7 @@ pub fn vmlsl_high_laneq_u16<const LANE: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_lane_u32<const LANE: i32>(
@@ -14551,7 +14698,7 @@ pub fn vmlsl_high_lane_u32<const LANE: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_laneq_u32<const LANE: i32>(
@@ -14566,7 +14713,7 @@ pub fn vmlsl_high_laneq_u32<const LANE: i32>(
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
     vmlsl_high_s16(a, b, vdupq_n_s16(c))
@@ -14575,7 +14722,7 @@ pub fn vmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
     vmlsl_high_s32(a, b, vdupq_n_s32(c))
@@ -14584,7 +14731,7 @@ pub fn vmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t {
     vmlsl_high_u16(a, b, vdupq_n_u16(c))
@@ -14593,7 +14740,7 @@ pub fn vmlsl_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t {
     vmlsl_high_u32(a, b, vdupq_n_u32(c))
@@ -14602,7 +14749,7 @@ pub fn vmlsl_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
     unsafe {
@@ -14615,7 +14762,7 @@ pub fn vmlsl_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
     unsafe {
@@ -14628,7 +14775,7 @@ pub fn vmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
     unsafe {
@@ -14641,7 +14788,7 @@ pub fn vmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
     unsafe {
@@ -14654,7 +14801,7 @@ pub fn vmlsl_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
     unsafe {
@@ -14667,7 +14814,7 @@ pub fn vmlsl_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
     unsafe {
@@ -14681,7 +14828,7 @@ pub fn vmlsl_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sxtl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sxtl2))]
 pub fn vmovl_high_s8(a: int8x16_t) -> int16x8_t {
     unsafe {
         let a: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
@@ -14693,7 +14840,7 @@ pub fn vmovl_high_s8(a: int8x16_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sxtl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sxtl2))]
 pub fn vmovl_high_s16(a: int16x8_t) -> int32x4_t {
     unsafe {
         let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
@@ -14705,7 +14852,7 @@ pub fn vmovl_high_s16(a: int16x8_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sxtl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sxtl2))]
 pub fn vmovl_high_s32(a: int32x4_t) -> int64x2_t {
     unsafe {
         let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
@@ -14717,7 +14864,7 @@ pub fn vmovl_high_s32(a: int32x4_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(uxtl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uxtl2))]
 pub fn vmovl_high_u8(a: uint8x16_t) -> uint16x8_t {
     unsafe {
         let a: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
@@ -14729,7 +14876,7 @@ pub fn vmovl_high_u8(a: uint8x16_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(uxtl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uxtl2))]
 pub fn vmovl_high_u16(a: uint16x8_t) -> uint32x4_t {
     unsafe {
         let a: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
@@ -14741,7 +14888,7 @@ pub fn vmovl_high_u16(a: uint16x8_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(uxtl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uxtl2))]
 pub fn vmovl_high_u32(a: uint32x4_t) -> uint64x2_t {
     unsafe {
         let a: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
@@ -14753,7 +14900,7 @@ pub fn vmovl_high_u32(a: uint32x4_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(xtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(xtn2))]
 pub fn vmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t {
     unsafe {
         let c: int8x8_t = simd_cast(b);
@@ -14765,7 +14912,7 @@ pub fn vmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(xtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(xtn2))]
 pub fn vmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t {
     unsafe {
         let c: int16x4_t = simd_cast(b);
@@ -14777,7 +14924,7 @@ pub fn vmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(xtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(xtn2))]
 pub fn vmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t {
     unsafe {
         let c: int32x2_t = simd_cast(b);
@@ -14789,7 +14936,7 @@ pub fn vmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(xtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(xtn2))]
 pub fn vmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
     unsafe {
         let c: uint8x8_t = simd_cast(b);
@@ -14801,7 +14948,7 @@ pub fn vmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(xtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(xtn2))]
 pub fn vmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
     unsafe {
         let c: uint16x4_t = simd_cast(b);
@@ -14813,7 +14960,7 @@ pub fn vmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(xtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(xtn2))]
 pub fn vmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
     unsafe {
         let c: uint32x2_t = simd_cast(b);
@@ -14960,7 +15107,7 @@ pub fn vmulh_laneq_f16<const LANE: i32>(a: f16, b: float16x8_t) -> f16 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int32x4_t {
@@ -14971,7 +15118,7 @@ pub fn vmull_high_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int32
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int32x4_t {
@@ -14982,7 +15129,7 @@ pub fn vmull_high_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int3
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int64x2_t {
@@ -14993,7 +15140,7 @@ pub fn vmull_high_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int64
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int64x2_t {
@@ -15004,7 +15151,7 @@ pub fn vmull_high_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int6
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint32x4_t {
@@ -15015,7 +15162,7 @@ pub fn vmull_high_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uin
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
@@ -15026,7 +15173,7 @@ pub fn vmull_high_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> ui
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint64x2_t {
@@ -15037,7 +15184,7 @@ pub fn vmull_high_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uin
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
@@ -15048,7 +15195,7 @@ pub fn vmull_high_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> ui
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t {
     vmull_high_s16(a, vdupq_n_s16(b))
@@ -15057,7 +15204,7 @@ pub fn vmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t {
     vmull_high_s32(a, vdupq_n_s32(b))
@@ -15066,7 +15213,7 @@ pub fn vmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_n_u16(a: uint16x8_t, b: u16) -> uint32x4_t {
     vmull_high_u16(a, vdupq_n_u16(b))
@@ -15075,7 +15222,7 @@ pub fn vmull_high_n_u16(a: uint16x8_t, b: u16) -> uint32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_n_u32(a: uint32x4_t, b: u32) -> uint64x2_t {
     vmull_high_u32(a, vdupq_n_u32(b))
@@ -15085,7 +15232,7 @@ pub fn vmull_high_n_u32(a: uint32x4_t, b: u32) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(pmull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(pmull2))]
 pub fn vmull_high_p64(a: poly64x2_t, b: poly64x2_t) -> p128 {
     unsafe { vmull_p64(simd_extract!(a, 1), simd_extract!(b, 1)) }
 }
@@ -15094,7 +15241,7 @@ pub fn vmull_high_p64(a: poly64x2_t, b: poly64x2_t) -> p128 {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(pmull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(pmull2))]
 pub fn vmull_high_p8(a: poly8x16_t, b: poly8x16_t) -> poly16x8_t {
     unsafe {
         let a: poly8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
@@ -15107,7 +15254,7 @@ pub fn vmull_high_p8(a: poly8x16_t, b: poly8x16_t) -> poly16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(smull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2))]
 pub fn vmull_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
     unsafe {
         let a: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
@@ -15120,7 +15267,7 @@ pub fn vmull_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(smull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2))]
 pub fn vmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
     unsafe {
         let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
@@ -15133,7 +15280,7 @@ pub fn vmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(smull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2))]
 pub fn vmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
     unsafe {
         let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
@@ -15146,7 +15293,7 @@ pub fn vmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(umull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2))]
 pub fn vmull_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
     unsafe {
         let a: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
@@ -15159,7 +15306,7 @@ pub fn vmull_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(umull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2))]
 pub fn vmull_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
     unsafe {
         let a: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
@@ -15172,7 +15319,7 @@ pub fn vmull_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(umull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2))]
 pub fn vmull_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
     unsafe {
         let a: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
@@ -16684,7 +16831,7 @@ pub fn vqaddd_u64(a: u64, b: u64) -> u64 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal2, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlal_high_lane_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
@@ -16695,7 +16842,7 @@ pub fn vqdmlal_high_lane_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal2, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlal_high_laneq_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
@@ -16706,7 +16853,7 @@ pub fn vqdmlal_high_laneq_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal2, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlal_high_lane_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
@@ -16717,7 +16864,7 @@ pub fn vqdmlal_high_lane_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal2, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlal_high_laneq_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
@@ -16728,7 +16875,7 @@ pub fn vqdmlal_high_laneq_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
     vqaddq_s32(a, vqdmull_high_n_s16(b, c))
@@ -16737,7 +16884,7 @@ pub fn vqdmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
     vqaddq_s32(a, vqdmull_high_s16(b, c))
@@ -16746,7 +16893,7 @@ pub fn vqdmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
     vqaddq_s64(a, vqdmull_high_n_s32(b, c))
@@ -16755,7 +16902,7 @@ pub fn vqdmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
     vqaddq_s64(a, vqdmull_high_s32(b, c))
@@ -16764,7 +16911,7 @@ pub fn vqdmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal, N = 2))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlal_laneq_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t {
@@ -16775,7 +16922,7 @@ pub fn vqdmlal_laneq_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t)
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal, N = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlal_laneq_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t {
@@ -16850,7 +16997,7 @@ pub fn vqdmlals_s32(a: i64, b: i32, c: i32) -> i64 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl2, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsl_high_lane_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
@@ -16861,7 +17008,7 @@ pub fn vqdmlsl_high_lane_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl2, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsl_high_laneq_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
@@ -16872,7 +17019,7 @@ pub fn vqdmlsl_high_laneq_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl2, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsl_high_lane_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
@@ -16883,7 +17030,7 @@ pub fn vqdmlsl_high_lane_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl2, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsl_high_laneq_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
@@ -16894,7 +17041,7 @@ pub fn vqdmlsl_high_laneq_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
     vqsubq_s32(a, vqdmull_high_n_s16(b, c))
@@ -16903,7 +17050,7 @@ pub fn vqdmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
     vqsubq_s32(a, vqdmull_high_s16(b, c))
@@ -16912,7 +17059,7 @@ pub fn vqdmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
     vqsubq_s64(a, vqdmull_high_n_s32(b, c))
@@ -16921,7 +17068,7 @@ pub fn vqdmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
     vqsubq_s64(a, vqdmull_high_s32(b, c))
@@ -16930,7 +17077,7 @@ pub fn vqdmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl, N = 2))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsl_laneq_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t {
@@ -16941,7 +17088,7 @@ pub fn vqdmlsl_laneq_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t)
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl, N = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsl_laneq_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t {
@@ -17138,7 +17285,7 @@ pub fn vqdmulhs_laneq_s32<const N: i32>(a: i32, b: int32x4_t) -> i32 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_high_lane_s16<const N: i32>(a: int16x8_t, b: int16x4_t) -> int32x4_t {
@@ -17153,7 +17300,7 @@ pub fn vqdmull_high_lane_s16<const N: i32>(a: int16x8_t, b: int16x4_t) -> int32x
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_high_laneq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int64x2_t {
@@ -17168,7 +17315,7 @@ pub fn vqdmull_high_laneq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int64
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2, N = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2, N = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_high_lane_s32<const N: i32>(a: int32x4_t, b: int32x2_t) -> int64x2_t {
@@ -17183,7 +17330,7 @@ pub fn vqdmull_high_lane_s32<const N: i32>(a: int32x4_t, b: int32x2_t) -> int64x
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2, N = 4))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2, N = 4))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_high_laneq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int32x4_t {
@@ -17198,7 +17345,7 @@ pub fn vqdmull_high_laneq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int32
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t {
     unsafe {
@@ -17211,7 +17358,7 @@ pub fn vqdmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t {
     unsafe {
@@ -17224,7 +17371,7 @@ pub fn vqdmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
     unsafe {
@@ -17237,7 +17384,7 @@ pub fn vqdmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
     unsafe {
@@ -17361,7 +17508,7 @@ pub fn vqdmulls_s32(a: i32, b: i32) -> i64 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t {
     unsafe {
@@ -17376,7 +17523,7 @@ pub fn vqmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t {
     unsafe { simd_shuffle!(a, vqmovn_s32(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
@@ -17385,7 +17532,7 @@ pub fn vqmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t {
     unsafe { simd_shuffle!(a, vqmovn_s64(b), [0, 1, 2, 3]) }
@@ -17394,7 +17541,7 @@ pub fn vqmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqxtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
     unsafe {
@@ -17409,7 +17556,7 @@ pub fn vqmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqxtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
     unsafe { simd_shuffle!(a, vqmovn_u32(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
@@ -17418,7 +17565,7 @@ pub fn vqmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqxtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
     unsafe { simd_shuffle!(a, vqmovn_u64(b), [0, 1, 2, 3]) }
@@ -17495,7 +17642,7 @@ pub fn vqmovns_u32(a: u32) -> u16 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_high_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtun2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqxtun2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovun_high_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
     unsafe {
@@ -17510,7 +17657,7 @@ pub fn vqmovun_high_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_high_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtun2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqxtun2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovun_high_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
     unsafe { simd_shuffle!(a, vqmovun_s32(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
@@ -17519,7 +17666,7 @@ pub fn vqmovun_high_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_high_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtun2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqxtun2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovun_high_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
     unsafe { simd_shuffle!(a, vqmovun_s64(b), [0, 1, 2, 3]) }
@@ -18281,7 +18428,7 @@ pub fn vqrshld_u64(a: u64, b: i64) -> u64 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqrshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
@@ -18298,7 +18445,7 @@ pub fn vqrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqrshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
@@ -18309,7 +18456,7 @@ pub fn vqrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqrshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
@@ -18320,7 +18467,7 @@ pub fn vqrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqrshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
@@ -18337,7 +18484,7 @@ pub fn vqrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqrshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
@@ -18348,7 +18495,7 @@ pub fn vqrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqrshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
@@ -18431,7 +18578,7 @@ pub fn vqrshrnd_n_s64<const N: i32>(a: i64) -> i32 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqrshrun2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
@@ -18448,7 +18595,7 @@ pub fn vqrshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqrshrun2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
@@ -18459,7 +18606,7 @@ pub fn vqrshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqrshrun2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
@@ -18730,7 +18877,7 @@ pub fn vqshlus_n_s32<const N: i32>(a: i32) -> u32 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
@@ -18747,7 +18894,7 @@ pub fn vqshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
@@ -18758,7 +18905,7 @@ pub fn vqshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
@@ -18769,7 +18916,7 @@ pub fn vqshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
@@ -18786,7 +18933,7 @@ pub fn vqshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
@@ -18797,7 +18944,7 @@ pub fn vqshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
@@ -18888,7 +19035,7 @@ pub fn vqshrns_n_u32<const N: i32>(a: u32) -> u16 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqshrun2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
@@ -18905,7 +19052,7 @@ pub fn vqshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16_
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqshrun2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
@@ -18916,7 +19063,7 @@ pub fn vqshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x8
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqshrun2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
@@ -22507,7 +22654,7 @@ pub fn vrshrd_n_u64<const N: i32>(a: u64) -> u64 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
@@ -22524,7 +22671,7 @@ pub fn vrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
@@ -22535,7 +22682,7 @@ pub fn vrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
@@ -22546,7 +22693,7 @@ pub fn vrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
@@ -22563,7 +22710,7 @@ pub fn vrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
@@ -22574,7 +22721,7 @@ pub fn vrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
@@ -23087,7 +23234,7 @@ pub fn vshld_u64(a: u64, b: i64) -> u64 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sshll2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sshll2, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshll_high_n_s8<const N: i32>(a: int8x16_t) -> int16x8_t {
@@ -23101,7 +23248,7 @@ pub fn vshll_high_n_s8<const N: i32>(a: int8x16_t) -> int16x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sshll2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sshll2, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshll_high_n_s16<const N: i32>(a: int16x8_t) -> int32x4_t {
@@ -23115,7 +23262,7 @@ pub fn vshll_high_n_s16<const N: i32>(a: int16x8_t) -> int32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sshll2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sshll2, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshll_high_n_s32<const N: i32>(a: int32x4_t) -> int64x2_t {
@@ -23129,7 +23276,7 @@ pub fn vshll_high_n_s32<const N: i32>(a: int32x4_t) -> int64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ushll2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ushll2, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshll_high_n_u8<const N: i32>(a: uint8x16_t) -> uint16x8_t {
@@ -23143,7 +23290,7 @@ pub fn vshll_high_n_u8<const N: i32>(a: uint8x16_t) -> uint16x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ushll2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ushll2, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshll_high_n_u16<const N: i32>(a: uint16x8_t) -> uint32x4_t {
@@ -23157,7 +23304,7 @@ pub fn vshll_high_n_u16<const N: i32>(a: uint16x8_t) -> uint32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ushll2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ushll2, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshll_high_n_u32<const N: i32>(a: uint32x4_t) -> uint64x2_t {
@@ -23171,7 +23318,7 @@ pub fn vshll_high_n_u32<const N: i32>(a: uint32x4_t) -> uint64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
@@ -23188,7 +23335,7 @@ pub fn vshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
@@ -23199,7 +23346,7 @@ pub fn vshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
@@ -23210,7 +23357,7 @@ pub fn vshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
@@ -23227,7 +23374,7 @@ pub fn vshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
@@ -23238,7 +23385,7 @@ pub fn vshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
@@ -25610,7 +25757,7 @@ pub fn vsubh_f16(a: f16, b: f16) -> f16 {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ssubl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubl2))]
 pub fn vsubl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
     unsafe {
         let c: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
@@ -25625,7 +25772,7 @@ pub fn vsubl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ssubl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubl2))]
 pub fn vsubl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
     unsafe {
         let c: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
@@ -25640,7 +25787,7 @@ pub fn vsubl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ssubl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubl2))]
 pub fn vsubl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
     unsafe {
         let c: int32x2_t = simd_shuffle!(a, a, [2, 3]);
@@ -25655,7 +25802,7 @@ pub fn vsubl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(usubl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubl2))]
 pub fn vsubl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
     unsafe {
         let c: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
@@ -25670,7 +25817,7 @@ pub fn vsubl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(usubl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubl2))]
 pub fn vsubl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
     unsafe {
         let c: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
@@ -25685,7 +25832,7 @@ pub fn vsubl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(usubl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubl2))]
 pub fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
     unsafe {
         let c: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
@@ -25700,7 +25847,7 @@ pub fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ssubw2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubw2))]
 pub fn vsubw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
     unsafe {
         let c: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
@@ -25712,7 +25859,7 @@ pub fn vsubw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ssubw2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubw2))]
 pub fn vsubw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
     unsafe {
         let c: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
@@ -25724,7 +25871,7 @@ pub fn vsubw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ssubw2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubw2))]
 pub fn vsubw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
     unsafe {
         let c: int32x2_t = simd_shuffle!(b, b, [2, 3]);
@@ -25736,7 +25883,7 @@ pub fn vsubw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(usubw2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubw2))]
 pub fn vsubw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
     unsafe {
         let c: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
@@ -25748,7 +25895,7 @@ pub fn vsubw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(usubw2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubw2))]
 pub fn vsubw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
     unsafe {
         let c: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
@@ -25760,7 +25907,7 @@ pub fn vsubw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(usubw2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubw2))]
 pub fn vsubw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
     unsafe {
         let c: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
@@ -26094,7 +26241,10 @@ pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t {
 #[target_feature(enable = "neon,fp16")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
 }
@@ -26104,7 +26254,10 @@ pub fn vtrn1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[target_feature(enable = "neon,fp16")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
 }
@@ -26113,7 +26266,10 @@ pub fn vtrn1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vtrn1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -26122,7 +26278,10 @@ pub fn vtrn1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vtrn1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -26131,7 +26290,10 @@ pub fn vtrn1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vtrn1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -26140,7 +26302,10 @@ pub fn vtrn1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vtrn1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -26149,7 +26314,10 @@ pub fn vtrn1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vtrn1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -26158,7 +26326,10 @@ pub fn vtrn1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vtrn1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -26167,7 +26338,10 @@ pub fn vtrn1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vtrn1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -26176,7 +26350,10 @@ pub fn vtrn1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
 }
@@ -26185,7 +26362,10 @@ pub fn vtrn1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
 }
@@ -26194,7 +26374,10 @@ pub fn vtrn1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unsafe {
         simd_shuffle!(
@@ -26209,7 +26392,10 @@ pub fn vtrn1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
 }
@@ -26218,7 +26404,10 @@ pub fn vtrn1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
 }
@@ -26227,7 +26416,10 @@ pub fn vtrn1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
 }
@@ -26236,7 +26428,10 @@ pub fn vtrn1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
 }
@@ -26245,7 +26440,10 @@ pub fn vtrn1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unsafe {
         simd_shuffle!(
@@ -26260,7 +26458,10 @@ pub fn vtrn1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
 }
@@ -26269,7 +26470,10 @@ pub fn vtrn1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
 }
@@ -26278,7 +26482,10 @@ pub fn vtrn1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
 }
@@ -26287,7 +26494,10 @@ pub fn vtrn1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
 }
@@ -26296,7 +26506,10 @@ pub fn vtrn1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     unsafe {
         simd_shuffle!(
@@ -26311,7 +26524,10 @@ pub fn vtrn1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
 }
@@ -26320,7 +26536,10 @@ pub fn vtrn1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
 }
@@ -26330,7 +26549,10 @@ pub fn vtrn1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
 #[target_feature(enable = "neon,fp16")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
 }
@@ -26340,7 +26562,10 @@ pub fn vtrn2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[target_feature(enable = "neon,fp16")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
 }
@@ -26349,7 +26574,10 @@ pub fn vtrn2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vtrn2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -26358,7 +26586,10 @@ pub fn vtrn2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vtrn2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -26367,7 +26598,10 @@ pub fn vtrn2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vtrn2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -26376,7 +26610,10 @@ pub fn vtrn2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vtrn2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -26385,7 +26622,10 @@ pub fn vtrn2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vtrn2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -26394,7 +26634,10 @@ pub fn vtrn2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vtrn2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -26403,7 +26646,10 @@ pub fn vtrn2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vtrn2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -26412,7 +26658,10 @@ pub fn vtrn2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
 }
@@ -26421,7 +26670,10 @@ pub fn vtrn2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
 }
@@ -26430,7 +26682,10 @@ pub fn vtrn2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unsafe {
         simd_shuffle!(
@@ -26445,7 +26700,10 @@ pub fn vtrn2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
 }
@@ -26454,7 +26712,10 @@ pub fn vtrn2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
 }
@@ -26463,7 +26724,10 @@ pub fn vtrn2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
 }
@@ -26472,7 +26736,10 @@ pub fn vtrn2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
 }
@@ -26481,7 +26748,10 @@ pub fn vtrn2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unsafe {
         simd_shuffle!(
@@ -26496,7 +26766,10 @@ pub fn vtrn2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
 }
@@ -26505,7 +26778,10 @@ pub fn vtrn2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
 }
@@ -26514,7 +26790,10 @@ pub fn vtrn2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
 }
@@ -26523,7 +26802,10 @@ pub fn vtrn2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
 }
@@ -26532,7 +26814,10 @@ pub fn vtrn2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     unsafe {
         simd_shuffle!(
@@ -26547,7 +26832,10 @@ pub fn vtrn2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
     unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
 }
@@ -26556,7 +26844,10 @@ pub fn vtrn2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
     unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
 }
@@ -26840,7 +27131,10 @@ pub fn vuqadds_s32(a: i32, b: u32) -> i32 {
 #[target_feature(enable = "neon,fp16")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
 }
@@ -26850,7 +27144,10 @@ pub fn vuzp1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[target_feature(enable = "neon,fp16")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
 }
@@ -26859,7 +27156,10 @@ pub fn vuzp1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vuzp1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -26868,7 +27168,10 @@ pub fn vuzp1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vuzp1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -26877,7 +27180,10 @@ pub fn vuzp1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vuzp1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -26886,7 +27192,10 @@ pub fn vuzp1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vuzp1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -26895,7 +27204,10 @@ pub fn vuzp1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vuzp1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -26904,7 +27216,10 @@ pub fn vuzp1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vuzp1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -26913,7 +27228,10 @@ pub fn vuzp1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vuzp1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -26922,7 +27240,10 @@ pub fn vuzp1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
 }
@@ -26931,7 +27252,10 @@ pub fn vuzp1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
 }
@@ -26940,7 +27264,10 @@ pub fn vuzp1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unsafe {
         simd_shuffle!(
@@ -26955,7 +27282,10 @@ pub fn vuzp1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
 }
@@ -26964,7 +27294,10 @@ pub fn vuzp1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
 }
@@ -26973,7 +27306,10 @@ pub fn vuzp1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
 }
@@ -26982,7 +27318,10 @@ pub fn vuzp1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
 }
@@ -26991,7 +27330,10 @@ pub fn vuzp1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unsafe {
         simd_shuffle!(
@@ -27006,7 +27348,10 @@ pub fn vuzp1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
 }
@@ -27015,7 +27360,10 @@ pub fn vuzp1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
 }
@@ -27024,7 +27372,10 @@ pub fn vuzp1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
 }
@@ -27033,7 +27384,10 @@ pub fn vuzp1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
 }
@@ -27042,7 +27396,10 @@ pub fn vuzp1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     unsafe {
         simd_shuffle!(
@@ -27057,7 +27414,10 @@ pub fn vuzp1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
 }
@@ -27066,7 +27426,10 @@ pub fn vuzp1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
 }
@@ -27076,7 +27439,10 @@ pub fn vuzp1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
 #[target_feature(enable = "neon,fp16")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
 }
@@ -27086,7 +27452,10 @@ pub fn vuzp2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[target_feature(enable = "neon,fp16")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
 }
@@ -27095,7 +27464,10 @@ pub fn vuzp2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vuzp2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -27104,7 +27476,10 @@ pub fn vuzp2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vuzp2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -27113,7 +27488,10 @@ pub fn vuzp2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vuzp2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -27122,7 +27500,10 @@ pub fn vuzp2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vuzp2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -27131,7 +27512,10 @@ pub fn vuzp2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vuzp2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -27140,7 +27524,10 @@ pub fn vuzp2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vuzp2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -27149,7 +27536,10 @@ pub fn vuzp2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vuzp2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -27158,7 +27548,10 @@ pub fn vuzp2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
 }
@@ -27167,7 +27560,10 @@ pub fn vuzp2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
 }
@@ -27176,7 +27572,10 @@ pub fn vuzp2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unsafe {
         simd_shuffle!(
@@ -27191,7 +27590,10 @@ pub fn vuzp2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
 }
@@ -27200,7 +27602,10 @@ pub fn vuzp2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
 }
@@ -27209,7 +27614,10 @@ pub fn vuzp2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
 }
@@ -27218,7 +27626,10 @@ pub fn vuzp2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
 }
@@ -27227,7 +27638,10 @@ pub fn vuzp2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unsafe {
         simd_shuffle!(
@@ -27242,7 +27656,10 @@ pub fn vuzp2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
 }
@@ -27251,7 +27668,10 @@ pub fn vuzp2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
 }
@@ -27260,7 +27680,10 @@ pub fn vuzp2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
 }
@@ -27269,7 +27692,10 @@ pub fn vuzp2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
 }
@@ -27278,7 +27704,10 @@ pub fn vuzp2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     unsafe {
         simd_shuffle!(
@@ -27293,7 +27722,10 @@ pub fn vuzp2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
 }
@@ -27302,7 +27734,10 @@ pub fn vuzp2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
 }
@@ -27330,7 +27765,10 @@ pub fn vxarq_u64<const IMM6: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[target_feature(enable = "neon,fp16")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
 }
@@ -27340,7 +27778,10 @@ pub fn vzip1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[target_feature(enable = "neon,fp16")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
 }
@@ -27349,7 +27790,10 @@ pub fn vzip1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -27358,7 +27802,10 @@ pub fn vzip1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
 }
@@ -27367,7 +27814,10 @@ pub fn vzip1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -27376,7 +27826,10 @@ pub fn vzip1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
 }
@@ -27385,7 +27838,10 @@ pub fn vzip1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unsafe {
         simd_shuffle!(
@@ -27400,7 +27856,10 @@ pub fn vzip1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
 }
@@ -27409,7 +27868,10 @@ pub fn vzip1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
 }
@@ -27418,7 +27880,10 @@ pub fn vzip1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -27427,7 +27892,10 @@ pub fn vzip1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
 }
@@ -27436,7 +27904,10 @@ pub fn vzip1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -27445,7 +27916,10 @@ pub fn vzip1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
 }
@@ -27454,7 +27928,10 @@ pub fn vzip1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unsafe {
         simd_shuffle!(
@@ -27469,7 +27946,10 @@ pub fn vzip1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
 }
@@ -27478,7 +27958,10 @@ pub fn vzip1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
 }
@@ -27487,7 +27970,10 @@ pub fn vzip1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -27496,7 +27982,10 @@ pub fn vzip1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
 }
@@ -27505,7 +27994,10 @@ pub fn vzip1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -27514,7 +28006,10 @@ pub fn vzip1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
 }
@@ -27523,7 +28018,10 @@ pub fn vzip1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     unsafe {
         simd_shuffle!(
@@ -27538,7 +28036,10 @@ pub fn vzip1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
 }
@@ -27547,7 +28048,10 @@ pub fn vzip1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
 }
@@ -27556,7 +28060,10 @@ pub fn vzip1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -27566,7 +28073,10 @@ pub fn vzip1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
 #[target_feature(enable = "neon,fp16")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
 }
@@ -27576,7 +28086,10 @@ pub fn vzip2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[target_feature(enable = "neon,fp16")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
 }
@@ -27585,7 +28098,10 @@ pub fn vzip2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -27594,7 +28110,10 @@ pub fn vzip2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
 }
@@ -27603,7 +28122,10 @@ pub fn vzip2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -27612,7 +28134,10 @@ pub fn vzip2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
 }
@@ -27621,7 +28146,10 @@ pub fn vzip2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unsafe {
         simd_shuffle!(
@@ -27636,7 +28164,10 @@ pub fn vzip2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
 }
@@ -27645,7 +28176,10 @@ pub fn vzip2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
 }
@@ -27654,7 +28188,10 @@ pub fn vzip2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -27663,7 +28200,10 @@ pub fn vzip2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
 }
@@ -27672,7 +28212,10 @@ pub fn vzip2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -27681,7 +28224,10 @@ pub fn vzip2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
 }
@@ -27690,7 +28236,10 @@ pub fn vzip2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unsafe {
         simd_shuffle!(
@@ -27705,7 +28254,10 @@ pub fn vzip2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
 }
@@ -27714,7 +28266,10 @@ pub fn vzip2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
 }
@@ -27723,7 +28278,10 @@ pub fn vzip2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -27732,7 +28290,10 @@ pub fn vzip2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
 }
@@ -27741,7 +28302,10 @@ pub fn vzip2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -27750,7 +28314,10 @@ pub fn vzip2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
 }
@@ -27759,7 +28326,10 @@ pub fn vzip2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     unsafe {
         simd_shuffle!(
@@ -27774,7 +28344,10 @@ pub fn vzip2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
     unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
 }
@@ -27783,7 +28356,10 @@ pub fn vzip2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
     unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
 }
@@ -27792,7 +28368,10 @@ pub fn vzip2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index ce9b236ff551e..8b35cb5d33eb5 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -2189,7 +2189,11 @@ pub fn vaddh_f16(a: f16, b: f16) -> f16 {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(addhn2)
 )]
 #[cfg_attr(
@@ -2213,7 +2217,11 @@ pub fn vaddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(addhn2)
 )]
 #[cfg_attr(
@@ -2237,7 +2245,11 @@ pub fn vaddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(addhn2)
 )]
 #[cfg_attr(
@@ -2261,7 +2273,11 @@ pub fn vaddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(addhn2)
 )]
 #[cfg_attr(
@@ -2285,7 +2301,11 @@ pub fn vaddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> uint8x16_t
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(addhn2)
 )]
 #[cfg_attr(
@@ -2309,7 +2329,11 @@ pub fn vaddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> uint16x8_
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(addhn2)
 )]
 #[cfg_attr(
@@ -2459,7 +2483,11 @@ pub fn vaddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(saddl2)
 )]
 #[cfg_attr(
@@ -2486,7 +2514,11 @@ pub fn vaddl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(saddl2)
 )]
 #[cfg_attr(
@@ -2513,7 +2545,11 @@ pub fn vaddl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(saddl2)
 )]
 #[cfg_attr(
@@ -2540,7 +2576,11 @@ pub fn vaddl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(uaddl2)
 )]
 #[cfg_attr(
@@ -2567,7 +2607,11 @@ pub fn vaddl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(uaddl2)
 )]
 #[cfg_attr(
@@ -2594,7 +2638,11 @@ pub fn vaddl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(uaddl2)
 )]
 #[cfg_attr(
@@ -2792,7 +2840,11 @@ pub fn vaddq_p128(a: p128, b: p128) -> p128 {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(saddw2)
 )]
 #[cfg_attr(
@@ -2817,7 +2869,11 @@ pub fn vaddw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(saddw2)
 )]
 #[cfg_attr(
@@ -2842,7 +2898,11 @@ pub fn vaddw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(saddw2)
 )]
 #[cfg_attr(
@@ -2867,7 +2927,11 @@ pub fn vaddw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(uaddw2)
 )]
 #[cfg_attr(
@@ -2892,7 +2956,11 @@ pub fn vaddw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(uaddw2)
 )]
 #[cfg_attr(
@@ -2917,7 +2985,11 @@ pub fn vaddw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(uaddw2)
 )]
 #[cfg_attr(
@@ -34881,7 +34953,11 @@ pub fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 2))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(sqdmlal, N = 2)
 )]
 #[rustc_legacy_const_generics(3)]
@@ -34904,7 +34980,11 @@ pub fn vqdmlal_lane_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t)
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 1))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(sqdmlal, N = 1)
 )]
 #[rustc_legacy_const_generics(3)]
@@ -35011,7 +35091,11 @@ pub fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 2))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(sqdmlsl, N = 2)
 )]
 #[rustc_legacy_const_generics(3)]
@@ -35034,7 +35118,11 @@ pub fn vqdmlsl_lane_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t)
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 1))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(sqdmlsl, N = 1)
 )]
 #[rustc_legacy_const_generics(3)]
@@ -39004,7 +39092,11 @@ pub fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(raddhn2)
 )]
 #[cfg_attr(
@@ -39026,7 +39118,11 @@ pub fn vraddhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(raddhn2)
 )]
 #[cfg_attr(
@@ -39048,7 +39144,11 @@ pub fn vraddhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(raddhn2)
 )]
 #[cfg_attr(
@@ -39070,7 +39170,11 @@ pub fn vraddhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(raddhn2)
 )]
 #[cfg_attr(
@@ -39094,7 +39198,11 @@ pub fn vraddhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(raddhn2)
 )]
 #[cfg_attr(
@@ -39118,7 +39226,11 @@ pub fn vraddhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(raddhn2)
 )]
 #[cfg_attr(
@@ -69685,7 +69797,11 @@ pub fn vsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(subhn2)
 )]
 #[cfg_attr(
@@ -69707,7 +69823,11 @@ pub fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(subhn2)
 )]
 #[cfg_attr(
@@ -69729,7 +69849,11 @@ pub fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(subhn2)
 )]
 #[cfg_attr(
@@ -69751,7 +69875,11 @@ pub fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(subhn2)
 )]
 #[cfg_attr(
@@ -69773,7 +69901,11 @@ pub fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(subhn2)
 )]
 #[cfg_attr(
@@ -69795,7 +69927,11 @@ pub fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(subhn2)
 )]
 #[cfg_attr(
@@ -70225,7 +70361,11 @@ pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(sudot, LANE = 0)
 )]
 #[rustc_legacy_const_generics(3)]
@@ -70252,7 +70392,11 @@ pub fn vsudot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x8_t)
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(sudot, LANE = 0)
 )]
 #[rustc_legacy_const_generics(3)]
@@ -70280,7 +70424,11 @@ pub fn vsudotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: uint8x8_
 #[target_feature(enable = "neon,i8mm")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 1))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(sudot, LANE = 3)
 )]
 #[rustc_legacy_const_generics(3)]
@@ -70300,7 +70448,11 @@ pub fn vsudot_laneq_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x16_
 #[target_feature(enable = "neon,i8mm")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 1))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(sudot, LANE = 3)
 )]
 #[rustc_legacy_const_generics(3)]
@@ -72296,7 +72448,11 @@ pub fn vusdotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x1
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(usdot)
 )]
 #[cfg_attr(
@@ -72325,7 +72481,11 @@ pub fn vusdot_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(usdot)
 )]
 #[cfg_attr(
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index e40775d68a70b..163145e7ba4d6 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -78,14 +78,22 @@ cfg-target-has-atomic-64: &cfg-target-has-atomic-64
 neon-unstable-fp8: &neon-unstable-fp8
   FnCall: [unstable, ['feature = "stdarch_neon_fp8"', 'issue = "none"']]
 
+# all(test, target_endian = "little")
+all-test-little-endian: &all-test-little-endian
+  FnCall: [all, [test, 'target_endian = "little"']]
+
 # #[cfg(target_endian = "little")]
-little-endian: &little-endian
+cfg-little-endian: &cfg-little-endian
   FnCall: [cfg, ['target_endian = "little"']]
 
 # #[cfg(target_endian = "big")]
-big-endian: &big-endian
+cfg-big-endian: &cfg-big-endian
   FnCall: [cfg, ['target_endian = "big"']]
 
+# all(test, not(target_env = "msvc"), target_endian = "big")
+cfg-test-not-msvc-little-endian: &cfg-test-not-msvc-little-endian
+  FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}, 'target_endian = "little"']]
+
 intrinsics:
   - name: "vaddd_{type}"
     doc: Add
@@ -206,8 +214,9 @@ intrinsics:
     doc: Signed Absolute difference Long
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
-    attr: [*neon-stable]
-    assert_instr: [sabdl2]
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sabdl2]]}]]
     safety: safe
     types:
       - [int8x16_t, int16x8_t, int8x8_t, uint8x8_t]
@@ -245,8 +254,9 @@ intrinsics:
     doc: Signed Absolute difference Long
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
-    attr: [*neon-stable]
-    assert_instr: [sabdl2]
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sabdl2]]}]]
     safety: safe
     types:
       - [int16x8_t, int32x4_t, int16x4_t, uint16x4_t]
@@ -284,8 +294,9 @@ intrinsics:
     doc: Signed Absolute difference Long
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
-    attr: [*neon-stable]
-    assert_instr: [sabdl2]
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sabdl2]]}]]
     safety: safe
     types:
       - [int32x4_t, int64x2_t, int32x2_t, uint32x2_t]
@@ -1492,7 +1503,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl2]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtl2]]}]]
       - *neon-stable
     safety: safe
     types:
@@ -1513,7 +1524,7 @@ intrinsics:
     arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn2]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtn2]]}]]
       - *neon-stable-fp16
       - *target-not-arm64ec
     safety: safe
@@ -1530,7 +1541,7 @@ intrinsics:
     arguments: ["a: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl2]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtl2]]}]]
       - *neon-stable-fp16
       - *target-not-arm64ec
     safety: safe
@@ -1547,7 +1558,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtn]]}]]
       - *neon-stable
     safety: safe
     types:
@@ -1560,7 +1571,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn2]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtn2]]}]]
       - *neon-stable
     safety: safe
     types:
@@ -1577,7 +1588,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtxn]]}]]
       - *neon-stable
     safety: safe
     types:
@@ -1612,7 +1623,7 @@ intrinsics:
     arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
     return_type: "{type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn2]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtxn2]]}]]
       - *neon-stable
     safety: safe
     types:
@@ -2687,7 +2698,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlal2]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smlal2]]}]]
       - *neon-stable
     safety: safe
     types:
@@ -2704,7 +2715,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlal2]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umlal2]]}]]
       - *neon-stable
     safety: safe
     types:
@@ -2727,7 +2738,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlsl2]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smlsl2]]}]]
       - *neon-stable
     safety: safe
     types:
@@ -2750,7 +2761,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlsl2]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umlsl2]]}]]
       - *neon-stable
     safety: safe
     types:
@@ -2766,8 +2777,9 @@ intrinsics:
     doc: Extract narrow
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
-    attr: [*neon-stable]
-    assert_instr: [xtn2]
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [xtn2]]}]]
     safety: safe
     types:
       - [int8x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
@@ -5145,8 +5157,9 @@ intrinsics:
     doc: Signed multiply long
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
     return_type: "{neon_type[3]}"
-    attr: [*neon-stable]
-    assert_instr: [smull2]
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smull2]]}]]
     safety: safe
     types:
       - [int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int16x8_t]
@@ -5167,8 +5180,9 @@ intrinsics:
     doc: "Unsigned multiply long"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
     return_type: "{neon_type[3]}"
-    attr: [*neon-stable]
-    assert_instr: [umull2]
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umull2]]}]]
     safety: safe
     types:
       - [uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', uint16x8_t]
@@ -5211,8 +5225,8 @@ intrinsics:
     return_type: "{neon_type[3]}"
     attr:
       - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [pmull2]]}]]
     safety: safe
-    assert_instr: [pmull2]
     types:
       - [poly8x16_t, poly8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', poly16x8_t]
     compose:
@@ -5233,8 +5247,8 @@ intrinsics:
     attr:
       - *neon-aes
       - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [pmull2]]}]]
     safety: safe
-    assert_instr: [pmull2]
     types:
       - [poly64x2_t, "p128"]
     compose:
@@ -5814,8 +5828,9 @@ intrinsics:
     doc: Signed Subtract Wide
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
-    attr: [*neon-stable]
-    assert_instr: [ssubw2]
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [ssubw2]]}]]
     safety: safe
     types:
       - [int16x8_t, int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
@@ -5835,8 +5850,9 @@ intrinsics:
     doc: Unsigned Subtract Wide
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
-    attr: [*neon-stable]
-    assert_instr: [usubw2]
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [usubw2]]}]]
     safety: safe
     types:
       - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
@@ -5856,8 +5872,9 @@ intrinsics:
     doc: "Signed Subtract Long"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
-    attr: [*neon-stable]
-    assert_instr: [ssubl2]
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [ssubl2]]}]]
     safety: safe
     types:
       - [int8x16_t, int16x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int8x8_t]
@@ -5886,8 +5903,9 @@ intrinsics:
     doc: "Unsigned Subtract Long"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
-    attr: [*neon-stable]
-    assert_instr: [usubl2]
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [usubl2]]}]]
     safety: safe
     types:
       - [uint8x16_t, uint16x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', uint8x8_t]
@@ -6894,8 +6912,9 @@ intrinsics:
     doc: Vector move
     arguments: ["a: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
-    attr: [*neon-stable]
-    assert_instr: [sxtl2]
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sxtl2]]}]]
     safety: safe
     types:
       - [int8x16_t, int16x8_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
@@ -6912,8 +6931,9 @@ intrinsics:
     doc: Vector move
     arguments: ["a: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
-    attr: [*neon-stable]
-    assert_instr: [uxtl2]
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uxtl2]]}]]
     safety: safe
     types:
       - [uint8x16_t, uint16x8_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
@@ -7125,7 +7145,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmull2]]}]]
       - *neon-stable
     safety: safe
     types:
@@ -7141,7 +7161,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmull2]]}]]
       - *neon-stable
     safety: safe
     types:
@@ -7209,7 +7229,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmull2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-stable
     static_defs: ['const N: i32']
@@ -7228,7 +7248,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2, 'N = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmull2, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-stable
     static_defs: ['const N: i32']
@@ -7246,7 +7266,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2, N = 4]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmull2, N = 4]]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-stable
     static_defs: ['const N: i32']
@@ -7298,7 +7318,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal2]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlal2]]}]]
       - *neon-stable
     safety: safe
     types:
@@ -7314,7 +7334,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal2, 'N = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlal2, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-stable
     static_defs: ['const N: i32']
@@ -7380,7 +7400,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlal, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-stable
     static_defs: ['const N: i32']
@@ -7396,7 +7416,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal, 'N = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlal, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-stable
     static_defs: ['const N: i32']
@@ -7412,7 +7432,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl2]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlsl2]]}]]
       - *neon-stable
     safety: safe
     types:
@@ -7428,7 +7448,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl2, 'N = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlsl2, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-stable
     static_defs: ['const N: i32']
@@ -7494,7 +7514,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlsl, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-stable
     static_defs: ['const N: i32']
@@ -7510,7 +7530,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl, 'N = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlsl, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-stable
     static_defs: ['const N: i32']
@@ -7578,7 +7598,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtn2]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqxtn2]]}]]
       - *neon-stable
     safety: safe
     types:
@@ -7593,7 +7613,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqxtn2]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uqxtn2]]}]]
       - *neon-stable
     safety: safe
     types:
@@ -7685,7 +7705,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtun2]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqxtun2]]}]]
       - *neon-stable
     safety: safe
     types:
@@ -7988,7 +8008,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrn2, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqrshrn2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-stable
     static_defs: ['const N: i32']
@@ -8025,7 +8045,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshrn2, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uqrshrn2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-stable
     static_defs: ['const N: i32']
@@ -8076,7 +8096,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrun2, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqrshrun2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-stable
     static_defs: ['const N: i32']
@@ -8270,7 +8290,7 @@ intrinsics:
     arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"]
     return_type: "{neon_type[3]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrn2, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqshrn2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-stable
     static_defs: ['const N: i32']
@@ -8338,7 +8358,7 @@ intrinsics:
     arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"]
     return_type: "{neon_type[3]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshrn2, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uqshrn2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-stable
     static_defs: ['const N: i32']
@@ -8383,7 +8403,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrun2, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqshrun2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-stable
     static_defs: ['const N: i32']
@@ -8859,7 +8879,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rshrn2, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [rshrn2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-stable
     static_defs: ['const N: i32']
@@ -8884,7 +8904,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
     return_type: "{neon_type[3]}"
     attr:
-      - *little-endian
+      - *cfg-little-endian
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rsubhn2]]}]]
       - *neon-stable
     safety: safe
@@ -8907,7 +8927,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
     return_type: "{neon_type[3]}"
     attr:
-      - *big-endian
+      - *cfg-big-endian
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rsubhn]]}]]
       - *neon-stable
     safety: safe
@@ -8930,7 +8950,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1', '3']]
       - *neon-stable
     static_defs: ['const LANE1: i32, const LANE2: i32']
@@ -8955,7 +8975,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1', '3']]
       - *neon-stable
     static_defs: ['const LANE1: i32, const LANE2: i32']
@@ -8980,7 +9000,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1', '3']]
       - *neon-stable
     static_defs: ['const LANE1: i32, const LANE2: i32']
@@ -9009,7 +9029,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1', '3']]
       - *neon-stable
     static_defs: ['const LANE1: i32, const LANE2: i32']
@@ -9035,7 +9055,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 1', 'LANE2 = 0']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 1', 'LANE2 = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1', '3']]
       - *neon-stable
     static_defs: ['const LANE1: i32, const LANE2: i32']
@@ -9155,7 +9175,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sshll2, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sshll2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
       - *neon-stable
     static_defs: ['const N: i32']
@@ -9174,7 +9194,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ushll2, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [ushll2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
       - *neon-stable
     static_defs: ['const N: i32']
@@ -9193,7 +9213,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [shrn2, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [shrn2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-stable
     static_defs: ['const N: i32']
@@ -9646,7 +9666,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-stable
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn1]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [trn1]]}]]
     safety: safe
     types:
       - [int8x8_t, '[0, 8, 2, 10, 4, 12, 6, 14]']
@@ -9676,7 +9696,7 @@ intrinsics:
       - *neon-fp16
       - *neon-stable-fp16
       - *target-not-arm64ec
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn1]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [trn1]]}]]
     safety: safe
     types:
       - [float16x4_t, '[0, 4, 2, 6]']
@@ -9690,7 +9710,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-stable
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip1]]}]]
     safety: safe
     types:
       - [int32x2_t, '[0, 2]']
@@ -9709,7 +9729,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-stable
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn2]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [trn2]]}]]
     safety: safe
     types:
       - [int8x8_t, '[1, 9, 3, 11, 5, 13, 7, 15]']
@@ -9738,7 +9758,7 @@ intrinsics:
       - *neon-fp16
       - *neon-stable-fp16
       - *target-not-arm64ec
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn2]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [trn2]]}]]
     safety: safe
     types:
       - [float16x4_t, '[1, 5, 3, 7]']
@@ -9752,7 +9772,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-stable
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip2]]}]]
     safety: safe
     types:
       - [int32x2_t, '[1, 3]']
@@ -9771,7 +9791,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-stable
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip2]]}]]
     safety: safe
     types:
       - [int8x8_t, '[4, 12, 5, 13, 6, 14, 7, 15]']
@@ -9807,7 +9827,7 @@ intrinsics:
       - *neon-fp16
       - *neon-stable-fp16
       - *target-not-arm64ec
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip2]]}]]
     safety: safe
     types:
       - [float16x4_t, '[2, 6, 3, 7]']
@@ -9821,7 +9841,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-stable
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip1]]}]]
     safety: safe
     types:
       - [int8x8_t, '[0, 8, 1, 9, 2, 10, 3, 11]']
@@ -9858,7 +9878,7 @@ intrinsics:
       - *neon-fp16
       - *neon-stable-fp16
       - *target-not-arm64ec
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip1]]}]]
     safety: safe
     types:
       - [float16x4_t, '[0, 4, 1, 5]']
@@ -9872,7 +9892,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-stable
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip1]]}]]
     safety: safe
     types:
       - [int32x2_t, '[0, 2]']
@@ -9891,7 +9911,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-stable
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp1]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uzp1]]}]]
     safety: safe
     types:
       - [int8x8_t, '[0, 2, 4, 6, 8, 10, 12, 14]']
@@ -9920,7 +9940,7 @@ intrinsics:
       - *neon-fp16
       - *neon-stable-fp16
       - *target-not-arm64ec
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp1]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uzp1]]}]]
     safety: safe
     types:
       - [float16x4_t, '[0, 2, 4, 6]']
@@ -9934,7 +9954,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-stable
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip2]]}]]
     safety: safe
     types:
       - [int32x2_t, '[1, 3]']
@@ -9953,7 +9973,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-stable
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp2]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uzp2]]}]]
     safety: safe
     types:
       - [int8x8_t, '[1, 3, 5, 7, 9, 11, 13, 15]']
@@ -9986,7 +10006,7 @@ intrinsics:
       - *neon-fp16
       - *neon-stable-fp16
       - *target-not-arm64ec
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp2]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uzp2]]}]]
     safety: safe
     types:
       - [float16x4_t, '[1, 3, 5, 7]']
@@ -10004,7 +10024,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-stable
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uabal2]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uabal2]]}]]
     safety: safe
     types:
       - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', '[8, 9, 10, 11, 12, 13, 14, 15]']
@@ -10031,7 +10051,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-stable
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sabal2]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [sabal2]]}]]
     safety: safe
     types:
       - [int16x8_t, int8x16_t, int8x16_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int8x8_t, uint8x8_t]
@@ -10360,7 +10380,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-stable
     static_defs: ['const LANE: i32']
@@ -10400,7 +10420,7 @@ intrinsics:
     arguments: ["a: {type[0]}", "b: {type[0]}", "c: {neon_type[1]}"]
     return_type: "{type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-stable
     static_defs: ['const LANE: i32']
@@ -10453,7 +10473,7 @@ intrinsics:
     arguments: ["a: {type[0]}", "b: {type[0]}", "v: {neon_type[1]}"]
     return_type: "{type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
       - *neon-unstable-f16
@@ -10473,7 +10493,7 @@ intrinsics:
     arguments: ["a: {type[0]}", "b: {type[0]}", "v: {neon_type[1]}"]
     return_type: "{type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
       - *neon-unstable-f16
@@ -10571,7 +10591,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-stable
     static_defs: ['const LANE: i32']
@@ -10603,7 +10623,7 @@ intrinsics:
     arguments: ["a: {type[0]}", "b: {type[0]}", "c: {neon_type[1]}"]
     return_type: "{type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-stable
     static_defs: ['const LANE: i32']
@@ -11325,7 +11345,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlsl2]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smlsl2]]}]]
       - *neon-stable
     safety: safe
     types:
@@ -11339,7 +11359,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlsl2]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umlsl2]]}]]
       - *neon-stable
     safety: safe
     types:
@@ -11353,7 +11373,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlsl2, 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smlsl2, 'LANE = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-stable
     static_defs: ['const LANE: i32']
@@ -11376,7 +11396,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlsl2, 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umlsl2, 'LANE = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-stable
     static_defs: ['const LANE: i32']
@@ -11455,8 +11475,8 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uabdl2]]}]]
       - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uabdl2]]}]]
     safety: safe
     types:
       - [uint8x16_t, uint16x8_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
@@ -11644,7 +11664,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smull2]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smull2]]}]]
       - *neon-stable
     safety: safe
     types:
@@ -11661,7 +11681,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umull2]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umull2]]}]]
       - *neon-stable
     safety: safe
     types:
@@ -11678,7 +11698,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smull2, 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smull2, 'LANE = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-stable
     static_defs: ['const LANE: i32']
@@ -11700,7 +11720,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umull2, 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umull2, 'LANE = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-stable
     static_defs: ['const LANE: i32']
@@ -11955,7 +11975,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlal2]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smlal2]]}]]
       - *neon-stable
     safety: safe
     types:
@@ -11973,7 +11993,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlal2]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umlal2]]}]]
       - *neon-stable
     safety: safe
     types:
@@ -11991,7 +12011,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlal2, 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smlal2, 'LANE = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-stable
     static_defs: ['const LANE: i32']
@@ -12010,7 +12030,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlal2, 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umlal2, 'LANE = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-stable
     static_defs: ['const LANE: i32']
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
index bfc8033fdd8c9..5324fbb25198f 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@@ -104,6 +104,10 @@ aarch64-crc-stable: &aarch64-crc-stable
 neon-unstable-f16: &neon-unstable-f16
   FnCall: [unstable, ['feature = "stdarch_neon_f16"', 'issue = "136306"']]
 
+# all(target_endian = "little")
+all-neon-target-aarch64-arm64ec-little-endian: &all-neon-target-aarch64-arm64ec-little-endian
+  FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}, 'target_endian = "little"']]
+
 intrinsics:
   - name: "vand{neon_type.no}"
     doc: Vector bitwise and
@@ -6139,7 +6143,7 @@ intrinsics:
       - *neon-i8mm
       - *neon-v8
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vusdot]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [usdot]]}]]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [usdot]]}]]
       - *neon-unstable-i8mm
       - *neon-cfg-arm-unstable
     safety: safe
@@ -6192,7 +6196,7 @@ intrinsics:
       - *neon-i8mm
       - *neon-v8
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsudot, 'LANE = 0']]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sudot, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [sudot, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-unstable-i8mm
       - *neon-cfg-arm-unstable
@@ -6796,7 +6800,7 @@ intrinsics:
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ["vsubhn"]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [subhn2]]}]]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [subhn2]]}]]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
@@ -6992,7 +6996,7 @@ intrinsics:
       - *neon-v8
       - *neon-i8mm
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsudot, 'LANE = 1']]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sudot, 'LANE = 3']]}]]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [sudot, 'LANE = 3']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']]
     static_defs: ["const LANE: i32"]
@@ -7650,7 +7654,7 @@ intrinsics:
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmlal, N = 2]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmlal, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [sqdmlal, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
@@ -7669,7 +7673,7 @@ intrinsics:
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmlal, N = 1]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmlal, 'N = 1']]}]]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [sqdmlal, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
@@ -7722,7 +7726,7 @@ intrinsics:
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmlsl, N = 2]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmlsl, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [sqdmlsl, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
@@ -7741,7 +7745,7 @@ intrinsics:
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmlsl, N = 1]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmlsl, 'N = 1']]}]]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [sqdmlsl, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
@@ -13216,7 +13220,7 @@ intrinsics:
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"{type[3]}"']]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [raddhn2]]}]]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [raddhn2]]}]]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
@@ -13243,7 +13247,7 @@ intrinsics:
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"{type[3]}"']]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [raddhn2]]}]]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [raddhn2]]}]]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
@@ -14469,7 +14473,7 @@ intrinsics:
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['{type[3]}']] }  ]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['{type[4]}']]}] ]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, { FnCall: [assert_instr, ['{type[4]}']]}] ]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
@@ -14525,7 +14529,7 @@ intrinsics:
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['{type[3]}']] }  ]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['{type[4]}']]}] ]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, { FnCall: [assert_instr, ['{type[4]}']]}] ]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
@@ -14583,7 +14587,7 @@ intrinsics:
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['vaddhn']] }  ]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['addhn2']]}] ]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, { FnCall: [assert_instr, ['addhn2']]}] ]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe

From 1d556c44052ce8ef994da25e52784c6486ff2b70 Mon Sep 17 00:00:00 2001
From: David Wood <david.wood2@arm.com>
Date: Mon, 20 Apr 2026 14:21:22 +0100
Subject: [PATCH 06/78] gen-arm: port `vcopy_lane_{s,u,p,f}64` to generator

This enables the `big_endian_inverse` transformation to apply to this
intrinsic.
---
 .../core_arch/src/aarch64/neon/generated.rs   | 60 +++++++++++++++++++
 .../crates/core_arch/src/aarch64/neon/mod.rs  | 50 ----------------
 .../spec/neon/aarch64.spec.yml                | 20 +++++++
 3 files changed, 80 insertions(+), 50 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 8e3493dfac128..e0b853c9ef24d 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -4332,6 +4332,66 @@ pub fn vcopy_lane_p16<const LANE1: i32, const LANE2: i32>(
     }
 }
 #[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_f64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vcopy_lane_f64<const LANE1: i32, const LANE2: i32>(
+    _a: float64x1_t,
+    b: float64x1_t,
+) -> float64x1_t {
+    static_assert!(LANE1 == 0);
+    static_assert!(LANE2 == 0);
+    b
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_s64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vcopy_lane_s64<const LANE1: i32, const LANE2: i32>(
+    _a: int64x1_t,
+    b: int64x1_t,
+) -> int64x1_t {
+    static_assert!(LANE1 == 0);
+    static_assert!(LANE2 == 0);
+    b
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vcopy_lane_u64<const LANE1: i32, const LANE2: i32>(
+    _a: uint64x1_t,
+    b: uint64x1_t,
+) -> uint64x1_t {
+    static_assert!(LANE1 == 0);
+    static_assert!(LANE2 == 0);
+    b
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_p64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vcopy_lane_p64<const LANE1: i32, const LANE2: i32>(
+    _a: poly64x1_t,
+    b: poly64x1_t,
+) -> poly64x1_t {
+    static_assert!(LANE1 == 0);
+    static_assert!(LANE2 == 0);
+    b
+}
+#[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
index cab36b9b4baf7..736baf5ec65bc 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
@@ -94,56 +94,6 @@ macro_rules! shift_right_and_insert {
 
 pub(crate) use shift_right_and_insert;
 
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vcopy_lane_s64<const N1: i32, const N2: i32>(_a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    static_assert!(N1 == 0);
-    static_assert!(N2 == 0);
-    b
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vcopy_lane_u64<const N1: i32, const N2: i32>(_a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    static_assert!(N1 == 0);
-    static_assert!(N2 == 0);
-    b
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vcopy_lane_p64<const N1: i32, const N2: i32>(_a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
-    static_assert!(N1 == 0);
-    static_assert!(N2 == 0);
-    b
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vcopy_lane_f64<const N1: i32, const N2: i32>(
-    _a: float64x1_t,
-    b: float64x1_t,
-) -> float64x1_t {
-    static_assert!(N1 == 0);
-    static_assert!(N2 == 0);
-    b
-}
 
 /// Duplicate vector element to vector or scalar
 #[inline]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 163145e7ba4d6..e68157bf541ec 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -8995,6 +8995,26 @@ intrinsics:
       - Identifier: ["{type[5]}", UnsafeSymbol]
       - Identifier: ["{type[6]}", UnsafeSymbol]
 
+  - name: "vcopy_lane_{neon_type[0]}"
+    doc: "Insert vector element from another vector element"
+    arguments: ["_a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE1 = {type[1]}', 'LANE2 = {type[1]}']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1', '3']]
+      - *neon-stable
+    static_defs: ['const LANE1: i32, const LANE2: i32']
+    safety: safe
+    types:
+      - [float64x1_t, '0', 'b']
+      - [poly64x1_t, '0', 'b']
+      - [uint64x1_t, '0', 'b']
+      - [int64x1_t, '0', 'b']
+    compose:
+      - FnCall: [static_assert!, ['LANE1 == {type[1]}']]
+      - FnCall: [static_assert!, ['LANE2 == {type[1]}']]
+      - Identifier: ["{type[2]}", Symbol]
+
   - name: "vcopy{neon_type[0].laneq_nox}"
     doc: "Insert vector element from another vector element"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]

From cf3ac4997ade61eec26e4a3d98c6e38cacf56deb Mon Sep 17 00:00:00 2001
From: David Wood <david.wood2@arm.com>
Date: Mon, 20 Apr 2026 14:21:22 +0100
Subject: [PATCH 07/78] gen-arm: port `vcopy_laneq_{f,u,s,p}64` to generator

This enables the `big_endian_inverse` transformation to apply to these
intrinsics.
---
 .../core_arch/src/aarch64/neon/generated.rs   | 60 ++++++++++++++++++
 .../crates/core_arch/src/aarch64/neon/mod.rs  | 61 -------------------
 .../spec/neon/aarch64.spec.yml                | 20 ++++++
 3 files changed, 80 insertions(+), 61 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index e0b853c9ef24d..335a79f0ee0fd 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -4641,6 +4641,66 @@ pub fn vcopy_laneq_p16<const LANE1: i32, const LANE2: i32>(
     }
 }
 #[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_f64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vcopy_laneq_f64<const LANE1: i32, const LANE2: i32>(
+    _a: float64x1_t,
+    b: float64x2_t,
+) -> float64x1_t {
+    static_assert!(LANE1 == 0);
+    static_assert_uimm_bits!(LANE2, 1);
+    unsafe { transmute::<f64, _>(simd_extract!(b, LANE2 as u32)) }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vcopy_laneq_s64<const LANE1: i32, const LANE2: i32>(
+    _a: int64x1_t,
+    b: int64x2_t,
+) -> int64x1_t {
+    static_assert!(LANE1 == 0);
+    static_assert_uimm_bits!(LANE2, 1);
+    unsafe { transmute::<i64, _>(simd_extract!(b, LANE2 as u32)) }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vcopy_laneq_u64<const LANE1: i32, const LANE2: i32>(
+    _a: uint64x1_t,
+    b: uint64x2_t,
+) -> uint64x1_t {
+    static_assert!(LANE1 == 0);
+    static_assert_uimm_bits!(LANE2, 1);
+    unsafe { transmute::<u64, _>(simd_extract!(b, LANE2 as u32)) }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_p64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vcopy_laneq_p64<const LANE1: i32, const LANE2: i32>(
+    _a: poly64x1_t,
+    b: poly64x2_t,
+) -> poly64x1_t {
+    static_assert!(LANE1 == 0);
+    static_assert_uimm_bits!(LANE2, 1);
+    unsafe { transmute::<u64, _>(simd_extract!(b, LANE2 as u32)) }
+}
+#[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
index 736baf5ec65bc..d232c263f5015 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
@@ -94,67 +94,6 @@ macro_rules! shift_right_and_insert {
 
 pub(crate) use shift_right_and_insert;
 
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vcopy_laneq_s64<const LANE1: i32, const LANE2: i32>(
-    _a: int64x1_t,
-    b: int64x2_t,
-) -> int64x1_t {
-    static_assert!(LANE1 == 0);
-    static_assert_uimm_bits!(LANE2, 1);
-    unsafe { transmute::<i64, _>(simd_extract!(b, LANE2 as u32)) }
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vcopy_laneq_u64<const LANE1: i32, const LANE2: i32>(
-    _a: uint64x1_t,
-    b: uint64x2_t,
-) -> uint64x1_t {
-    static_assert!(LANE1 == 0);
-    static_assert_uimm_bits!(LANE2, 1);
-    unsafe { transmute::<u64, _>(simd_extract!(b, LANE2 as u32)) }
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vcopy_laneq_p64<const LANE1: i32, const LANE2: i32>(
-    _a: poly64x1_t,
-    b: poly64x2_t,
-) -> poly64x1_t {
-    static_assert!(LANE1 == 0);
-    static_assert_uimm_bits!(LANE2, 1);
-    unsafe { transmute::<u64, _>(simd_extract!(b, LANE2 as u32)) }
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vcopy_laneq_f64<const LANE1: i32, const LANE2: i32>(
-    _a: float64x1_t,
-    b: float64x2_t,
-) -> float64x1_t {
-    static_assert!(LANE1 == 0);
-    static_assert_uimm_bits!(LANE2, 1);
-    unsafe { transmute::<f64, _>(simd_extract!(b, LANE2 as u32)) }
-}
-
 /// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index e68157bf541ec..dd625ebb22ba9 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -9015,6 +9015,26 @@ intrinsics:
       - FnCall: [static_assert!, ['LANE2 == {type[1]}']]
       - Identifier: ["{type[2]}", Symbol]
 
+  - name: "vcopy_laneq_{neon_type[0]}"
+    doc: "Insert vector element from another vector element"
+    arguments: ["_a: {neon_type[1]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE1 = {type[2]}', 'LANE2 = {type[3]}']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1', '3']]
+      - *neon-stable
+    static_defs: ['const LANE1: i32, const LANE2: i32']
+    safety: safe
+    types:
+      - [float64x2_t, float64x1_t, '0', '1', 'transmute::<f64, _>(simd_extract!(b, LANE2 as u32))']
+      - [poly64x2_t, poly64x1_t, '0', '1', 'transmute::<u64, _>(simd_extract!(b, LANE2 as u32))']
+      - [uint64x2_t, uint64x1_t, '0', '1', 'transmute::<u64, _>(simd_extract!(b, LANE2 as u32))']
+      - [int64x2_t, int64x1_t, '0', '1', 'transmute::<i64, _>(simd_extract!(b, LANE2 as u32))']
+    compose:
+      - FnCall: [static_assert!, ['LANE1 == {type[2]}']]
+      - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[3]}']]
+      - Identifier: ["{type[4]}", UnsafeSymbol]
+
   - name: "vcopy{neon_type[0].laneq_nox}"
     doc: "Insert vector element from another vector element"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]

From 54b56a3d9528960f7bbabadb238ba309328a92f5 Mon Sep 17 00:00:00 2001
From: David Wood <david.wood2@arm.com>
Date: Mon, 20 Apr 2026 14:21:22 +0100
Subject: [PATCH 08/78] gen-arm: port `vget_{high,low}_{p,f}64` to generator

This enables the `big_endian_inverse` transform to apply to these
intrinsics.
---
 .../core_arch/src/aarch64/neon/generated.rs   | 18 +++++++++
 .../crates/core_arch/src/aarch64/neon/mod.rs  | 36 ------------------
 .../src/arm_shared/neon/generated.rs          | 38 +++++++++++++++++++
 .../spec/neon/aarch64.spec.yml                | 14 +++++++
 .../spec/neon/arm_shared.spec.yml             | 10 +++--
 5 files changed, 76 insertions(+), 40 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 335a79f0ee0fd..4baad8fc373b4 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -11438,6 +11438,24 @@ pub fn vfmsd_lane_f64<const LANE: i32>(a: f64, b: f64, c: float64x1_t) -> f64 {
 pub fn vfmsd_laneq_f64<const LANE: i32>(a: f64, b: f64, c: float64x2_t) -> f64 {
     vfmad_laneq_f64::<LANE>(a, -b, c)
 }
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fmov))]
+pub fn vget_high_f64(a: float64x2_t) -> float64x1_t {
+    unsafe { float64x1_t([simd_extract!(a, 1)]) }
+}
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vget_low_f64(a: float64x2_t) -> float64x1_t {
+    unsafe { float64x1_t([simd_extract!(a, 0)]) }
+}
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16)"]
 #[doc = "## Safety"]
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
index d232c263f5015..07f5a61d7737a 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
@@ -332,42 +332,6 @@ pub fn vmovq_n_f64(value: f64) -> float64x2_t {
     vdupq_n_f64(value)
 }
 
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vget_high_f64(a: float64x2_t) -> float64x1_t {
-    unsafe { float64x1_t([simd_extract!(a, 1)]) }
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ext))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vget_high_p64(a: poly64x2_t) -> poly64x1_t {
-    unsafe { transmute(u64x1::new(simd_extract!(a, 1))) }
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vget_low_f64(a: float64x2_t) -> float64x1_t {
-    unsafe { float64x1_t([simd_extract!(a, 0)]) }
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vget_low_p64(a: poly64x2_t) -> poly64x1_t {
-    unsafe { transmute(u64x1::new(simd_extract!(a, 0))) }
-}
-
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index 8b35cb5d33eb5..f24a12ad40c95 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -13400,6 +13400,27 @@ pub fn vget_high_u8(a: uint8x16_t) -> uint8x8_t {
     unsafe { simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) }
 }
 #[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vget_high_p64(a: poly64x2_t) -> poly64x1_t {
+    unsafe { transmute(u64x1::new(simd_extract!(a, 1))) }
+}
+#[doc = "Duplicate vector element to vector or scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -14087,6 +14108,23 @@ pub fn vget_low_u8(a: uint8x16_t) -> uint8x8_t {
     unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
 }
 #[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vget_low_p64(a: poly64x2_t) -> poly64x1_t {
+    unsafe { transmute(u64x1::new(simd_extract!(a, 0))) }
+}
+#[doc = "Duplicate vector element to vector or scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index dd625ebb22ba9..e38b4a508dda0 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -14404,3 +14404,17 @@ intrinsics:
           links:
             - link: "llvm.aarch64.fjcvtzs"
               arch: aarch64,arm64ec
+
+  - name: "{type[0]}"
+    doc: "Duplicate vector element to vector or scalar"
+    arguments: ["a: {type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - *neon-stable
+    assert_instr: ['{type[3]}']
+    safety: safe
+    types:
+      - ['vget_high_f64', 'float64x2_t', 'float64x1_t', 'fmov', 'float64x1_t([simd_extract!(a, 1)])']
+      - ['vget_low_f64', 'float64x2_t', 'float64x1_t', 'nop', 'float64x1_t([simd_extract!(a, 0)])']
+    compose:
+      - Identifier: ['{type[4]}', UnsafeSymbol]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
index 5324fbb25198f..68c92aa54dc6a 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@@ -15001,8 +15001,9 @@ intrinsics:
       - *neon-cfg-arm-unstable
     safety: safe
     types:
-      - ['vget_high_s64', 'int64x2_t', 'int64x1_t', 'vmov', 'ext', 'int64x1_t([simd_extract!(a, 1)])']
-      - ['vget_high_u64', 'uint64x2_t', 'uint64x1_t', 'vmov', 'ext', 'uint64x1_t([simd_extract!(a, 1)])']
+      - ['vget_high_s64', 'int64x2_t',   'int64x1_t',   'vmov', 'ext',  'int64x1_t([simd_extract!(a, 1)])']
+      - ['vget_high_u64', 'uint64x2_t',  'uint64x1_t',  'vmov', 'ext',  'uint64x1_t([simd_extract!(a, 1)])']
+      - ['vget_high_p64', 'poly64x2_t',  'poly64x1_t',  'vmov', 'ext',  'transmute(u64x1::new(simd_extract!(a, 1)))']
     compose:
       - Identifier: ['{type[5]}', UnsafeSymbol]
 
@@ -15017,8 +15018,9 @@ intrinsics:
       - *neon-cfg-arm-unstable
     safety: safe
     types:
-      - ['vget_low_s64', 'int64x2_t', 'int64x1_t', 'int64x1_t([simd_extract!(a, 0)])']
-      - ['vget_low_u64', 'uint64x2_t', 'uint64x1_t', 'uint64x1_t([simd_extract!(a, 0)])']
+      - ['vget_low_s64', 'int64x2_t',   'int64x1_t',   'int64x1_t([simd_extract!(a, 0)])']
+      - ['vget_low_u64', 'uint64x2_t',  'uint64x1_t',  'uint64x1_t([simd_extract!(a, 0)])']
+      - ['vget_low_p64', 'poly64x2_t',  'poly64x1_t',  'transmute(u64x1::new(simd_extract!(a, 0)))']
     compose:
       - Identifier: ['{type[3]}', UnsafeSymbol]
 

From fe3e89d2d5372bae5e063f989506497bf57f96cb Mon Sep 17 00:00:00 2001
From: David Wood <david.wood2@arm.com>
Date: Mon, 20 Apr 2026 14:21:22 +0100
Subject: [PATCH 09/78] gen-arm: port `vgetq_lane_f64` to generator

This enables the `big_endian_inverse` transform to apply to this
intrinsic.
---
 .../core_arch/src/aarch64/neon/generated.rs      | 11 +++++++++++
 .../crates/core_arch/src/aarch64/neon/mod.rs     | 14 --------------
 .../stdarch-gen-arm/spec/neon/aarch64.spec.yml   | 16 ++++++++++++++++
 3 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 4baad8fc373b4..c1054f8ff0103 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -11456,6 +11456,17 @@ pub fn vget_high_f64(a: float64x2_t) -> float64x1_t {
 pub fn vget_low_f64(a: float64x2_t) -> float64x1_t {
     unsafe { float64x1_t([simd_extract!(a, 0)]) }
 }
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
+pub fn vgetq_lane_f64<const IMM5: i32>(a: float64x2_t) -> f64 {
+    static_assert_uimm_bits!(IMM5, 1);
+    unsafe { simd_extract!(a, IMM5 as u32) }
+}
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16)"]
 #[doc = "## Safety"]
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
index 07f5a61d7737a..bc82c980a802d 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
@@ -346,20 +346,6 @@ pub fn vget_lane_f64<const IMM5: i32>(v: float64x1_t) -> f64 {
     unsafe { simd_extract!(v, IMM5 as u32) }
 }
 
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, IMM5 = 0)
-)]
-pub fn vgetq_lane_f64<const IMM5: i32>(v: float64x2_t) -> f64 {
-    static_assert_uimm_bits!(IMM5, 1);
-    unsafe { simd_extract!(v, IMM5 as u32) }
-}
-
 /// Vector combine
 #[inline]
 #[target_feature(enable = "neon")]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index e38b4a508dda0..48ce5a5f86676 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -14418,3 +14418,19 @@ intrinsics:
       - ['vget_low_f64', 'float64x2_t', 'float64x1_t', 'nop', 'float64x1_t([simd_extract!(a, 0)])']
     compose:
       - Identifier: ['{type[4]}', UnsafeSymbol]
+
+  - name: "vgetq_lane_{type[1]}"
+    doc: "Duplicate vector element to vector or scalar"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-stable
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+    assert_instr: [['nop', 'IMM5 = 0']]
+    safety: safe
+    static_defs: ["const IMM5: i32"]
+    types:
+      - ['float64x2_t', 'f64']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [IMM5, "1"]]
+      - FnCall: ['simd_extract!', [a, 'IMM5 as u32'], [] , true]

From 4661e734dcbd73f72050b5dc5b3743b2e01248df Mon Sep 17 00:00:00 2001
From: David Wood <david.wood2@arm.com>
Date: Mon, 20 Apr 2026 14:21:22 +0100
Subject: [PATCH 10/78] gen-arm: port `vcombine_f64` to generator

This enables the `big_endian_inverse` transform to apply to this
intrinsic.
---
 .../crates/core_arch/src/aarch64/neon/generated.rs  |  9 +++++++++
 .../crates/core_arch/src/aarch64/neon/mod.rs        |  9 ---------
 .../stdarch-gen-arm/spec/neon/aarch64.spec.yml      | 13 +++++++++++++
 3 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index c1054f8ff0103..3c69f9defb641 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -4106,6 +4106,15 @@ pub fn vcmlaq_rot90_laneq_f32<const LANE: i32>(
         vcmlaq_rot90_f32(a, b, c)
     }
 }
+#[doc = "Join two smaller vectors into a single larger vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn vcombine_f64(a: float64x1_t, b: float64x1_t) -> float64x2_t {
+    unsafe { simd_shuffle!(a, b, [0, 1]) }
+}
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_f32)"]
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
index bc82c980a802d..386176874d035 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
@@ -346,15 +346,6 @@ pub fn vget_lane_f64<const IMM5: i32>(v: float64x1_t) -> f64 {
     unsafe { simd_extract!(v, IMM5 as u32) }
 }
 
-/// Vector combine
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vcombine_f64(low: float64x1_t, high: float64x1_t) -> float64x2_t {
-    unsafe { simd_shuffle!(low, high, [0, 1]) }
-}
-
 /// Shift left
 #[inline]
 #[target_feature(enable = "neon")]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 48ce5a5f86676..0f9c8f2a5c552 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -14419,6 +14419,19 @@ intrinsics:
     compose:
       - Identifier: ['{type[4]}', UnsafeSymbol]
 
+  - name: "vcombine{neon_type[0].noq}"
+    doc: Join two smaller vectors into a single larger vector
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-stable
+    assert_instr: [mov]
+    safety: safe
+    types:
+      - [float64x1_t, float64x2_t, '[0, 1]']
+    compose:
+      - FnCall: [simd_shuffle!, [a, b, '{type[2]}']]
+
   - name: "vgetq_lane_{type[1]}"
     doc: "Duplicate vector element to vector or scalar"
     arguments: ["a: {type[0]}"]

From 1c2d423713d788cf473c4467db77176cfcea45d5 Mon Sep 17 00:00:00 2001
From: David Wood <david.wood2@arm.com>
Date: Fri, 1 May 2026 11:08:00 +0100
Subject: [PATCH 11/78] core_arch: remove redundant tests

Manually written tests for `vcopy_laneq`, `vget{,q}_{high,low,lane}`,
`vcombine` and `vaddw_high` don't account for the expected differences
in output for big endian intrinsics and given that they duplicate the
testing of `intrinsic-test`, can be removed.
---
 .../crates/core_arch/src/aarch64/neon/mod.rs  |  77 ---
 .../core_arch/src/arm_shared/neon/mod.rs      | 612 ------------------
 .../crates/stdarch-verify/tests/arm.rs        | 124 ++++
 3 files changed, 124 insertions(+), 689 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
index 386176874d035..b140131012bbf 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
@@ -611,38 +611,6 @@ mod tests {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
-    fn test_vget_high_f64() {
-        let a = f64x2::new(1.0, 2.0);
-        let e = f64x1::new(2.0);
-        let r = f64x1::from(vget_high_f64(a.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_high_p64() {
-        let a = u64x2::new(1, 2);
-        let e = u64x1::new(2);
-        let r = u64x1::from(vget_high_p64(a.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_low_f64() {
-        let a = f64x2::new(1.0, 2.0);
-        let e = f64x1::new(1.0);
-        let r = f64x1::from(vget_low_f64(a.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_low_p64() {
-        let a = u64x2::new(1, 2);
-        let e = u64x1::new(1);
-        let r = u64x1::from(vget_low_p64(a.into()));
-        assert_eq!(r, e);
-    }
-
     #[simd_test(enable = "neon")]
     fn test_vget_lane_f64() {
         let v = f64x1::new(1.0);
@@ -650,15 +618,6 @@ mod tests {
         assert_eq!(r, 1.0);
     }
 
-    #[simd_test(enable = "neon")]
-    fn test_vgetq_lane_f64() {
-        let v = f64x2::new(0.0, 1.0);
-        let r = vgetq_lane_f64::<1>(v.into());
-        assert_eq!(r, 1.0);
-        let r = vgetq_lane_f64::<0>(v.into());
-        assert_eq!(r, 0.0);
-    }
-
     #[simd_test(enable = "neon")]
     fn test_vcopy_lane_s64() {
         let a = i64x1::new(1);
@@ -695,42 +654,6 @@ mod tests {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
-    fn test_vcopy_laneq_s64() {
-        let a = i64x1::new(1);
-        let b = i64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let e = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let r = i64x1::from(vcopy_laneq_s64::<0, 1>(a.into(), b.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vcopy_laneq_u64() {
-        let a = u64x1::new(1);
-        let b = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let e = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r = u64x1::from(vcopy_laneq_u64::<0, 1>(a.into(), b.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vcopy_laneq_p64() {
-        let a = u64x1::new(1);
-        let b = u64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let e = u64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let r = u64x1::from(vcopy_laneq_p64::<0, 1>(a.into(), b.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vcopy_laneq_f64() {
-        let a = f64x1::from_array([1.]);
-        let b = f64x2::from_array([0., 0.5]);
-        let e = f64x1::from_array([0.5]);
-        let r = f64x1::from(vcopy_laneq_f64::<0, 1>(a.into(), b.into()));
-        assert_eq!(r, e);
-    }
-
     #[simd_test(enable = "neon")]
     fn test_vbsl_f64() {
         let a = u64x1::new(0x8000000000000000);
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
index ed65de2b89460..c4249c2b3c768 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
@@ -1663,235 +1663,6 @@ mod tests {
         assert_eq!(r, e)
     }
 
-    #[simd_test(enable = "neon")]
-    fn test_vget_lane_u8() {
-        let v = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r = vget_lane_u8::<1>(v.into());
-        assert_eq!(r, 2);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vgetq_lane_u32() {
-        let v = u32x4::new(1, 2, 3, 4);
-        let r = vgetq_lane_u32::<1>(v.into());
-        assert_eq!(r, 2);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vgetq_lane_s32() {
-        let v = i32x4::new(1, 2, 3, 4);
-        let r = vgetq_lane_s32::<1>(v.into());
-        assert_eq!(r, 2);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_lane_u64() {
-        let v = u64x1::new(1);
-        let r = vget_lane_u64::<0>(v.into());
-        assert_eq!(r, 1);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vgetq_lane_u16() {
-        let v = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r = vgetq_lane_u16::<1>(v.into());
-        assert_eq!(r, 2);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_lane_s8() {
-        let v = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r = vget_lane_s8::<2>(v.into());
-        assert_eq!(r, 2);
-        let r = vget_lane_s8::<4>(v.into());
-        assert_eq!(r, 4);
-        let r = vget_lane_s8::<5>(v.into());
-        assert_eq!(r, 5);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_lane_p8() {
-        let v = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r = vget_lane_p8::<2>(v.into());
-        assert_eq!(r, 2);
-        let r = vget_lane_p8::<3>(v.into());
-        assert_eq!(r, 3);
-        let r = vget_lane_p8::<5>(v.into());
-        assert_eq!(r, 5);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_lane_p16() {
-        let v = u16x4::new(0, 1, 2, 3);
-        let r = vget_lane_p16::<2>(v.into());
-        assert_eq!(r, 2);
-        let r = vget_lane_p16::<3>(v.into());
-        assert_eq!(r, 3);
-        let r = vget_lane_p16::<0>(v.into());
-        assert_eq!(r, 0);
-        let r = vget_lane_p16::<1>(v.into());
-        assert_eq!(r, 1);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_lane_s16() {
-        let v = i16x4::new(0, 1, 2, 3);
-        let r = vget_lane_s16::<2>(v.into());
-        assert_eq!(r, 2);
-        let r = vget_lane_s16::<3>(v.into());
-        assert_eq!(r, 3);
-        let r = vget_lane_s16::<0>(v.into());
-        assert_eq!(r, 0);
-        let r = vget_lane_s16::<1>(v.into());
-        assert_eq!(r, 1);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_lane_u16() {
-        let v = u16x4::new(0, 1, 2, 3);
-        let r = vget_lane_u16::<2>(v.into());
-        assert_eq!(r, 2);
-        let r = vget_lane_u16::<3>(v.into());
-        assert_eq!(r, 3);
-        let r = vget_lane_u16::<0>(v.into());
-        assert_eq!(r, 0);
-        let r = vget_lane_u16::<1>(v.into());
-        assert_eq!(r, 1);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_lane_f32() {
-        let v = f32x2::new(0.0, 1.0);
-        let r = vget_lane_f32::<1>(v.into());
-        assert_eq!(r, 1.0);
-        let r = vget_lane_f32::<0>(v.into());
-        assert_eq!(r, 0.0);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_lane_s32() {
-        let v = i32x2::new(0, 1);
-        let r = vget_lane_s32::<1>(v.into());
-        assert_eq!(r, 1);
-        let r = vget_lane_s32::<0>(v.into());
-        assert_eq!(r, 0);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_lane_u32() {
-        let v = u32x2::new(0, 1);
-        let r = vget_lane_u32::<1>(v.into());
-        assert_eq!(r, 1);
-        let r = vget_lane_u32::<0>(v.into());
-        assert_eq!(r, 0);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_lane_s64() {
-        let v = i64x1::new(1);
-        let r = vget_lane_s64::<0>(v.into());
-        assert_eq!(r, 1);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_lane_p64() {
-        let v = u64x1::new(1);
-        let r = vget_lane_p64::<0>(v.into());
-        assert_eq!(r, 1);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vgetq_lane_s8() {
-        let v = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r = vgetq_lane_s8::<7>(v.into());
-        assert_eq!(r, 7);
-        let r = vgetq_lane_s8::<13>(v.into());
-        assert_eq!(r, 13);
-        let r = vgetq_lane_s8::<3>(v.into());
-        assert_eq!(r, 3);
-        let r = vgetq_lane_s8::<0>(v.into());
-        assert_eq!(r, 0);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vgetq_lane_p8() {
-        let v = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r = vgetq_lane_p8::<7>(v.into());
-        assert_eq!(r, 7);
-        let r = vgetq_lane_p8::<13>(v.into());
-        assert_eq!(r, 13);
-        let r = vgetq_lane_p8::<3>(v.into());
-        assert_eq!(r, 3);
-        let r = vgetq_lane_p8::<0>(v.into());
-        assert_eq!(r, 0);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vgetq_lane_u8() {
-        let v = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r = vgetq_lane_u8::<7>(v.into());
-        assert_eq!(r, 7);
-        let r = vgetq_lane_u8::<13>(v.into());
-        assert_eq!(r, 13);
-        let r = vgetq_lane_u8::<3>(v.into());
-        assert_eq!(r, 3);
-        let r = vgetq_lane_u8::<0>(v.into());
-        assert_eq!(r, 0);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vgetq_lane_s16() {
-        let v = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r = vgetq_lane_s16::<3>(v.into());
-        assert_eq!(r, 3);
-        let r = vgetq_lane_s16::<6>(v.into());
-        assert_eq!(r, 6);
-        let r = vgetq_lane_s16::<0>(v.into());
-        assert_eq!(r, 0);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vgetq_lane_p16() {
-        let v = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r = vgetq_lane_p16::<3>(v.into());
-        assert_eq!(r, 3);
-        let r = vgetq_lane_p16::<7>(v.into());
-        assert_eq!(r, 7);
-        let r = vgetq_lane_p16::<1>(v.into());
-        assert_eq!(r, 1);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vgetq_lane_f32() {
-        let v = f32x4::new(0.0, 1.0, 2.0, 3.0);
-        let r = vgetq_lane_f32::<3>(v.into());
-        assert_eq!(r, 3.0);
-        let r = vgetq_lane_f32::<0>(v.into());
-        assert_eq!(r, 0.0);
-        let r = vgetq_lane_f32::<2>(v.into());
-        assert_eq!(r, 2.0);
-        let r = vgetq_lane_f32::<1>(v.into());
-        assert_eq!(r, 1.0);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vgetq_lane_s64() {
-        let v = i64x2::new(0, 1);
-        let r = vgetq_lane_s64::<1>(v.into());
-        assert_eq!(r, 1);
-        let r = vgetq_lane_s64::<0>(v.into());
-        assert_eq!(r, 0);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vgetq_lane_p64() {
-        let v = u64x2::new(0, 1);
-        let r = vgetq_lane_p64::<1>(v.into());
-        assert_eq!(r, 1);
-        let r = vgetq_lane_p64::<0>(v.into());
-        assert_eq!(r, 0);
-    }
-
     #[simd_test(enable = "neon")]
     fn test_vext_s64() {
         let a: i64x1 = i64x1::new(0);
@@ -1910,182 +1681,6 @@ mod tests {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
-    fn test_vget_high_s8() {
-        let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e = i8x8::new(9, 10, 11, 12, 13, 14, 15, 16);
-        let r = i8x8::from(vget_high_s8(a.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_high_s16() {
-        let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e = i16x4::new(5, 6, 7, 8);
-        let r = i16x4::from(vget_high_s16(a.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_high_s32() {
-        let a = i32x4::new(1, 2, 3, 4);
-        let e = i32x2::new(3, 4);
-        let r = i32x2::from(vget_high_s32(a.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_high_s64() {
-        let a = i64x2::new(1, 2);
-        let e = i64x1::new(2);
-        let r = i64x1::from(vget_high_s64(a.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_high_u8() {
-        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e = u8x8::new(9, 10, 11, 12, 13, 14, 15, 16);
-        let r = u8x8::from(vget_high_u8(a.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_high_u16() {
-        let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e = u16x4::new(5, 6, 7, 8);
-        let r = u16x4::from(vget_high_u16(a.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_high_u32() {
-        let a = u32x4::new(1, 2, 3, 4);
-        let e = u32x2::new(3, 4);
-        let r = u32x2::from(vget_high_u32(a.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_high_u64() {
-        let a = u64x2::new(1, 2);
-        let e = u64x1::new(2);
-        let r = u64x1::from(vget_high_u64(a.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_high_p8() {
-        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e = u8x8::new(9, 10, 11, 12, 13, 14, 15, 16);
-        let r = u8x8::from(vget_high_p8(a.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_high_p16() {
-        let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e = u16x4::new(5, 6, 7, 8);
-        let r = u16x4::from(vget_high_p16(a.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_high_f32() {
-        let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
-        let e = f32x2::new(3.0, 4.0);
-        let r = f32x2::from(vget_high_f32(a.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_low_s8() {
-        let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r = i8x8::from(vget_low_s8(a.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_low_s16() {
-        let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e = i16x4::new(1, 2, 3, 4);
-        let r = i16x4::from(vget_low_s16(a.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_low_s32() {
-        let a = i32x4::new(1, 2, 3, 4);
-        let e = i32x2::new(1, 2);
-        let r = i32x2::from(vget_low_s32(a.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_low_s64() {
-        let a = i64x2::new(1, 2);
-        let e = i64x1::new(1);
-        let r = i64x1::from(vget_low_s64(a.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_low_u8() {
-        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r = u8x8::from(vget_low_u8(a.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_low_u16() {
-        let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e = u16x4::new(1, 2, 3, 4);
-        let r = u16x4::from(vget_low_u16(a.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_low_u32() {
-        let a = u32x4::new(1, 2, 3, 4);
-        let e = u32x2::new(1, 2);
-        let r = u32x2::from(vget_low_u32(a.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_low_u64() {
-        let a = u64x2::new(1, 2);
-        let e = u64x1::new(1);
-        let r = u64x1::from(vget_low_u64(a.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_low_p8() {
-        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r = u8x8::from(vget_low_p8(a.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_low_p16() {
-        let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e = u16x4::new(1, 2, 3, 4);
-        let r = u16x4::from(vget_low_p16(a.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vget_low_f32() {
-        let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
-        let e = f32x2::new(1.0, 2.0);
-        let r = f32x2::from(vget_low_f32(a.into()));
-        assert_eq!(r, e);
-    }
-
     #[simd_test(enable = "neon")]
     fn test_vdupq_n_s8() {
         let v: i8 = 42;
@@ -2469,13 +2064,6 @@ mod tests {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
-    fn test_vgetq_lane_u64() {
-        let v = u64x2::new(1, 2);
-        let r = vgetq_lane_u64::<1>(v.into());
-        assert_eq!(r, 2);
-    }
-
     #[simd_test(enable = "neon")]
     fn test_vadd_s8() {
         test_ari_s8(
@@ -2638,72 +2226,6 @@ mod tests {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
-    fn test_vaddl_high_s8() {
-        let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let x = i8::MAX;
-        let b = i8x16::new(x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
-        let x = x as i16;
-        let e = i16x8::new(x + 8, x + 9, x + 10, x + 11, x + 12, x + 13, x + 14, x + 15);
-        let r = i16x8::from(vaddl_high_s8(a.into(), b.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vaddl_high_s16() {
-        let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let x = i16::MAX;
-        let b = i16x8::new(x, x, x, x, x, x, x, x);
-        let x = x as i32;
-        let e = i32x4::new(x + 4, x + 5, x + 6, x + 7);
-        let r = i32x4::from(vaddl_high_s16(a.into(), b.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vaddl_high_s32() {
-        let a = i32x4::new(0, 1, 2, 3);
-        let x = i32::MAX;
-        let b = i32x4::new(x, x, x, x);
-        let x = x as i64;
-        let e = i64x2::new(x + 2, x + 3);
-        let r = i64x2::from(vaddl_high_s32(a.into(), b.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vaddl_high_u8() {
-        let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let x = u8::MAX;
-        let b = u8x16::new(x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
-        let x = x as u16;
-        let e = u16x8::new(x + 8, x + 9, x + 10, x + 11, x + 12, x + 13, x + 14, x + 15);
-        let r = u16x8::from(vaddl_high_u8(a.into(), b.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vaddl_high_u16() {
-        let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let x = u16::MAX;
-        let b = u16x8::new(x, x, x, x, x, x, x, x);
-        let x = x as u32;
-        let e = u32x4::new(x + 4, x + 5, x + 6, x + 7);
-        let r = u32x4::from(vaddl_high_u16(a.into(), b.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vaddl_high_u32() {
-        let a = u32x4::new(0, 1, 2, 3);
-        let x = u32::MAX;
-        let b = u32x4::new(x, x, x, x);
-        let x = x as u64;
-        let e = u64x2::new(x + 2, x + 3);
-        let r = u64x2::from(vaddl_high_u32(a.into(), b.into()));
-        assert_eq!(r, e);
-    }
-
     #[simd_test(enable = "neon")]
     fn test_vaddw_s8() {
         let x = i16::MAX;
@@ -2794,96 +2316,6 @@ mod tests {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
-    fn test_vaddw_high_s8() {
-        let x = i16::MAX;
-        let a = i16x8::new(x, 1, 2, 3, 4, 5, 6, 7);
-        let y = i8::MAX;
-        let b = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, y, y, y, y, y, y, y, y);
-        let y = y as i16;
-        let e = i16x8::new(
-            x.wrapping_add(y),
-            1 + y,
-            2 + y,
-            3 + y,
-            4 + y,
-            5 + y,
-            6 + y,
-            7 + y,
-        );
-        let r = i16x8::from(vaddw_high_s8(a.into(), b.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vaddw_high_s16() {
-        let x = i32::MAX;
-        let a = i32x4::new(x, 1, 2, 3);
-        let y = i16::MAX;
-        let b = i16x8::new(0, 0, 0, 0, y, y, y, y);
-        let y = y as i32;
-        let e = i32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y);
-        let r = i32x4::from(vaddw_high_s16(a.into(), b.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vaddw_high_s32() {
-        let x = i64::MAX;
-        let a = i64x2::new(x, 1);
-        let y = i32::MAX;
-        let b = i32x4::new(0, 0, y, y);
-        let y = y as i64;
-        let e = i64x2::new(x.wrapping_add(y), 1 + y);
-        let r = i64x2::from(vaddw_high_s32(a.into(), b.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vaddw_high_u8() {
-        let x = u16::MAX;
-        let a = u16x8::new(x, 1, 2, 3, 4, 5, 6, 7);
-        let y = u8::MAX;
-        let b = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, y, y, y, y, y, y, y, y);
-        let y = y as u16;
-        let e = u16x8::new(
-            x.wrapping_add(y),
-            1 + y,
-            2 + y,
-            3 + y,
-            4 + y,
-            5 + y,
-            6 + y,
-            7 + y,
-        );
-        let r = u16x8::from(vaddw_high_u8(a.into(), b.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vaddw_high_u16() {
-        let x = u32::MAX;
-        let a = u32x4::new(x, 1, 2, 3);
-        let y = u16::MAX;
-        let b = u16x8::new(0, 0, 0, 0, y, y, y, y);
-        let y = y as u32;
-        let e = u32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y);
-        let r = u32x4::from(vaddw_high_u16(a.into(), b.into()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    fn test_vaddw_high_u32() {
-        let x = u64::MAX;
-        let a = u64x2::new(x, 1);
-        let y = u32::MAX;
-        let b = u32x4::new(0, 0, y, y);
-        let y = y as u64;
-        let e = u64x2::new(x.wrapping_add(y), 1 + y);
-        let r = u64x2::from(vaddw_high_u32(a.into(), b.into()));
-        assert_eq!(r, e);
-    }
-
     #[simd_test(enable = "neon")]
     fn test_vmvn_s8() {
         let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
@@ -5766,42 +5198,9 @@ mod tests {
         assert_eq!(r, e);
     }
 
-    macro_rules! test_vcombine {
-        ($test_id:ident => $fn_id:ident ([$($a:expr),*], [$($b:expr),*])) => {
-            #[allow(unused_assignments)]
-            #[simd_test(enable = "neon")]
-            fn $test_id() {
-                let a = Simd::from_array([$($a),*]);
-                let b = Simd::from_array([$($b),*]);
-                let e = Simd::from_array([$($a),* $(, $b)*]);
-                let c = $fn_id(a.into(), b.into());
-                let mut d = e;
-                d = c.into();
-                assert_eq!(d, e);
-            }
-        }
-    }
-
-    test_vcombine!(test_vcombine_s8 => vcombine_s8([3_i8, -4, 5, -6, 7, 8, 9, 10], [13_i8, -14, 15, -16, 17, 18, 19, 110]));
-    test_vcombine!(test_vcombine_u8 => vcombine_u8([3_u8, 4, 5, 6, 7, 8, 9, 10], [13_u8, 14, 15, 16, 17, 18, 19, 110]));
-    test_vcombine!(test_vcombine_p8 => vcombine_p8([3_u8, 4, 5, 6, 7, 8, 9, 10], [13_u8, 14, 15, 16, 17, 18, 19, 110]));
-
-    test_vcombine!(test_vcombine_s16 => vcombine_s16([3_i16, -4, 5, -6], [13_i16, -14, 15, -16]));
-    test_vcombine!(test_vcombine_u16 => vcombine_u16([3_u16, 4, 5, 6], [13_u16, 14, 15, 16]));
-    test_vcombine!(test_vcombine_p16 => vcombine_p16([3_u16, 4, 5, 6], [13_u16, 14, 15, 16]));
-
     #[cfg(not(target_arch = "arm64ec"))]
     mod fp16 {
         use super::*;
-        #[simd_test(enable = "neon,fp16")]
-        fn test_vcombine_f16() {
-            let a = f16x4::from_array([3_f16, 4., 5., 6.]);
-            let b = f16x4::from_array([13_f16, 14., 15., 16.]);
-            let e = f16x8::from_array([3_f16, 4., 5., 6., 13_f16, 14., 15., 16.]);
-            let c = f16x8::from(vcombine_f16(a.into(), b.into()));
-            assert_eq!(c, e);
-        }
-
         #[simd_test(enable = "neon,fp16")]
         fn test_vld1_lane_f16() {
             let a = f16x4::new(0., 1., 2., 3.);
@@ -5837,17 +5236,6 @@ mod tests {
         }
     }
 
-    test_vcombine!(test_vcombine_s32 => vcombine_s32([3_i32, -4], [13_i32, -14]));
-    test_vcombine!(test_vcombine_u32 => vcombine_u32([3_u32, 4], [13_u32, 14]));
-    // note: poly32x4 does not exist, and neither does vcombine_p32
-    test_vcombine!(test_vcombine_f32 => vcombine_f32([3_f32, -4.], [13_f32, -14.]));
-
-    test_vcombine!(test_vcombine_s64 => vcombine_s64([-3_i64], [13_i64]));
-    test_vcombine!(test_vcombine_u64 => vcombine_u64([3_u64], [13_u64]));
-    test_vcombine!(test_vcombine_p64 => vcombine_p64([3_u64], [13_u64]));
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    test_vcombine!(test_vcombine_f64 => vcombine_f64([-3_f64], [13_f64]));
-
     macro_rules! lane_wide_store_load_roundtrip {
         ($elem_ty:ty, $len:expr, $idx:expr, $vec_ty:ty, $store:ident, $load:ident) => {
             let vals: [$elem_ty; $len] = crate::array::from_fn(|i| i as $elem_ty);
diff --git a/library/stdarch/crates/stdarch-verify/tests/arm.rs b/library/stdarch/crates/stdarch-verify/tests/arm.rs
index 2242bf4264e57..6ba9c17c4804c 100644
--- a/library/stdarch/crates/stdarch-verify/tests/arm.rs
+++ b/library/stdarch/crates/stdarch-verify/tests/arm.rs
@@ -5145,4 +5145,128 @@ static SKIP_RUNTIME_TESTS: &'static [&'static str] = &[
     "vzipq_p16",
     "__rndr",
     "__rndrrs",
+    "vcopy_laneq_f64",
+    "vcopy_laneq_f64",
+    "vcopy_laneq_s64",
+    "vcopy_laneq_s64",
+    "vcopy_laneq_u64",
+    "vcopy_laneq_u64",
+    "vcopy_laneq_p64",
+    "vcopy_laneq_p64",
+    "vget_high_f64",
+    "vget_high_f64",
+    "vget_high_p64",
+    "vget_high_p64",
+    "vget_low_f64",
+    "vget_low_f64",
+    "vget_low_p64",
+    "vget_low_p64",
+    "vgetq_lane_f64",
+    "vgetq_lane_f64",
+    "vaddl_high_s16",
+    "vaddl_high_s16",
+    "vaddl_high_s32",
+    "vaddl_high_s32",
+    "vaddl_high_s8",
+    "vaddl_high_s8",
+    "vaddl_high_u16",
+    "vaddl_high_u16",
+    "vaddl_high_u32",
+    "vaddl_high_u32",
+    "vaddl_high_u8",
+    "vaddl_high_u8",
+    "vget_high_f32",
+    "vget_high_f32",
+    "vget_high_p16",
+    "vget_high_p16",
+    "vget_high_p8",
+    "vget_high_p8",
+    "vget_high_s16",
+    "vget_high_s16",
+    "vget_high_s32",
+    "vget_high_s32",
+    "vget_high_s8",
+    "vget_high_s8",
+    "vget_high_u16",
+    "vget_high_u16",
+    "vget_high_u32",
+    "vget_high_u32",
+    "vget_high_u8",
+    "vget_high_u8",
+    "vget_high_s64",
+    "vget_high_s64",
+    "vget_high_u64",
+    "vget_high_u64",
+    "vget_lane_f32",
+    "vget_lane_f32",
+    "vget_lane_p16",
+    "vget_lane_p16",
+    "vget_lane_p8",
+    "vget_lane_p8",
+    "vget_lane_s16",
+    "vget_lane_s16",
+    "vget_lane_s32",
+    "vget_lane_s32",
+    "vget_lane_s8",
+    "vget_lane_s8",
+    "vget_lane_u16",
+    "vget_lane_u16",
+    "vget_lane_u32",
+    "vget_lane_u32",
+    "vget_lane_u8",
+    "vget_lane_u8",
+    "vgetq_lane_f32",
+    "vgetq_lane_f32",
+    "vgetq_lane_p16",
+    "vgetq_lane_p16",
+    "vgetq_lane_p64",
+    "vgetq_lane_p64",
+    "vgetq_lane_p8",
+    "vgetq_lane_p8",
+    "vgetq_lane_s16",
+    "vgetq_lane_s16",
+    "vgetq_lane_s32",
+    "vgetq_lane_s32",
+    "vgetq_lane_s64",
+    "vgetq_lane_s64",
+    "vgetq_lane_s8",
+    "vgetq_lane_s8",
+    "vgetq_lane_u16",
+    "vgetq_lane_u16",
+    "vgetq_lane_u32",
+    "vgetq_lane_u32",
+    "vgetq_lane_u8",
+    "vgetq_lane_u8",
+    "vget_lane_p64",
+    "vget_lane_s64",
+    "vget_lane_u64",
+    "vget_low_f32",
+    "vget_low_f32",
+    "vget_low_p16",
+    "vget_low_p16",
+    "vget_low_p8",
+    "vget_low_p8",
+    "vget_low_s16",
+    "vget_low_s16",
+    "vget_low_s32",
+    "vget_low_s32",
+    "vget_low_s8",
+    "vget_low_s8",
+    "vget_low_u16",
+    "vget_low_u16",
+    "vget_low_u32",
+    "vget_low_u32",
+    "vget_low_u8",
+    "vget_low_u8",
+    "vget_low_s64",
+    "vget_low_s64",
+    "vget_low_u64",
+    "vget_low_u64",
+    "vaddw_high_s16",
+    "vaddw_high_s32",
+    "vaddw_high_s8",
+    "vaddw_high_u16",
+    "vaddw_high_u32",
+    "vaddw_high_u8",
+    "vgetq_lane_u64",
 ];

From 11fd314918e4c573c52354e1bf801c8c9b508ef1 Mon Sep 17 00:00:00 2001
From: WANG Rui <wangrui@loongson.cn>
Date: Tue, 21 Apr 2026 22:34:57 +0800
Subject: [PATCH 12/78] loongarch: Use `intrinsics::simd` for vbit{clr,rev,set}

---
 .../src/loongarch64/lasx/generated.rs         | 108 ------------------
 .../src/loongarch64/lasx/portable.rs          |  12 ++
 .../src/loongarch64/lsx/generated.rs          | 108 ------------------
 .../core_arch/src/loongarch64/lsx/portable.rs |  12 ++
 .../crates/core_arch/src/loongarch64/simd.rs  |  18 +++
 .../crates/stdarch-gen-loongarch/lasx.spec    |  12 ++
 .../crates/stdarch-gen-loongarch/lsx.spec     |  12 ++
 .../src/portable-intrinsics.txt               |  24 ++++
 8 files changed, 90 insertions(+), 216 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs b/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
index c7dbd46480a42..29aed37e6b200 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
@@ -43,14 +43,6 @@ unsafe extern "unadjusted" {
     fn __lasx_xvsrlri_w(a: __v8i32, b: u32) -> __v8i32;
     #[link_name = "llvm.loongarch.lasx.xvsrlri.d"]
     fn __lasx_xvsrlri_d(a: __v4i64, b: u32) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvbitclr.b"]
-    fn __lasx_xvbitclr_b(a: __v32u8, b: __v32u8) -> __v32u8;
-    #[link_name = "llvm.loongarch.lasx.xvbitclr.h"]
-    fn __lasx_xvbitclr_h(a: __v16u16, b: __v16u16) -> __v16u16;
-    #[link_name = "llvm.loongarch.lasx.xvbitclr.w"]
-    fn __lasx_xvbitclr_w(a: __v8u32, b: __v8u32) -> __v8u32;
-    #[link_name = "llvm.loongarch.lasx.xvbitclr.d"]
-    fn __lasx_xvbitclr_d(a: __v4u64, b: __v4u64) -> __v4u64;
     #[link_name = "llvm.loongarch.lasx.xvbitclri.b"]
     fn __lasx_xvbitclri_b(a: __v32u8, b: u32) -> __v32u8;
     #[link_name = "llvm.loongarch.lasx.xvbitclri.h"]
@@ -59,14 +51,6 @@ unsafe extern "unadjusted" {
     fn __lasx_xvbitclri_w(a: __v8u32, b: u32) -> __v8u32;
     #[link_name = "llvm.loongarch.lasx.xvbitclri.d"]
     fn __lasx_xvbitclri_d(a: __v4u64, b: u32) -> __v4u64;
-    #[link_name = "llvm.loongarch.lasx.xvbitset.b"]
-    fn __lasx_xvbitset_b(a: __v32u8, b: __v32u8) -> __v32u8;
-    #[link_name = "llvm.loongarch.lasx.xvbitset.h"]
-    fn __lasx_xvbitset_h(a: __v16u16, b: __v16u16) -> __v16u16;
-    #[link_name = "llvm.loongarch.lasx.xvbitset.w"]
-    fn __lasx_xvbitset_w(a: __v8u32, b: __v8u32) -> __v8u32;
-    #[link_name = "llvm.loongarch.lasx.xvbitset.d"]
-    fn __lasx_xvbitset_d(a: __v4u64, b: __v4u64) -> __v4u64;
     #[link_name = "llvm.loongarch.lasx.xvbitseti.b"]
     fn __lasx_xvbitseti_b(a: __v32u8, b: u32) -> __v32u8;
     #[link_name = "llvm.loongarch.lasx.xvbitseti.h"]
@@ -75,14 +59,6 @@ unsafe extern "unadjusted" {
     fn __lasx_xvbitseti_w(a: __v8u32, b: u32) -> __v8u32;
     #[link_name = "llvm.loongarch.lasx.xvbitseti.d"]
     fn __lasx_xvbitseti_d(a: __v4u64, b: u32) -> __v4u64;
-    #[link_name = "llvm.loongarch.lasx.xvbitrev.b"]
-    fn __lasx_xvbitrev_b(a: __v32u8, b: __v32u8) -> __v32u8;
-    #[link_name = "llvm.loongarch.lasx.xvbitrev.h"]
-    fn __lasx_xvbitrev_h(a: __v16u16, b: __v16u16) -> __v16u16;
-    #[link_name = "llvm.loongarch.lasx.xvbitrev.w"]
-    fn __lasx_xvbitrev_w(a: __v8u32, b: __v8u32) -> __v8u32;
-    #[link_name = "llvm.loongarch.lasx.xvbitrev.d"]
-    fn __lasx_xvbitrev_d(a: __v4u64, b: __v4u64) -> __v4u64;
     #[link_name = "llvm.loongarch.lasx.xvbitrevi.b"]
     fn __lasx_xvbitrevi_b(a: __v32u8, b: u32) -> __v32u8;
     #[link_name = "llvm.loongarch.lasx.xvbitrevi.h"]
@@ -1285,34 +1261,6 @@ pub fn lasx_xvsrlri_d<const IMM6: u32>(a: m256i) -> m256i {
     unsafe { transmute(__lasx_xvsrlri_d(transmute(a), IMM6)) }
 }
 
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitclr_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvbitclr_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitclr_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvbitclr_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitclr_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvbitclr_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitclr_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvbitclr_d(transmute(a), transmute(b))) }
-}
-
 #[inline]
 #[target_feature(enable = "lasx")]
 #[rustc_legacy_const_generics(1)]
@@ -1349,34 +1297,6 @@ pub fn lasx_xvbitclri_d<const IMM6: u32>(a: m256i) -> m256i {
     unsafe { transmute(__lasx_xvbitclri_d(transmute(a), IMM6)) }
 }
 
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitset_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvbitset_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitset_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvbitset_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitset_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvbitset_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitset_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvbitset_d(transmute(a), transmute(b))) }
-}
-
 #[inline]
 #[target_feature(enable = "lasx")]
 #[rustc_legacy_const_generics(1)]
@@ -1413,34 +1333,6 @@ pub fn lasx_xvbitseti_d<const IMM6: u32>(a: m256i) -> m256i {
     unsafe { transmute(__lasx_xvbitseti_d(transmute(a), IMM6)) }
 }
 
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitrev_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvbitrev_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitrev_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvbitrev_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitrev_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvbitrev_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitrev_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvbitrev_d(transmute(a), transmute(b))) }
-}
-
 #[inline]
 #[target_feature(enable = "lasx")]
 #[rustc_legacy_const_generics(1)]
diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs b/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs
index f2a7254d7947b..5f306502d9311 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs
@@ -120,6 +120,18 @@ impl_vvv!("lasx", lasx_xvsrl_b, ls::simd_shr, m256i, u8x32);
 impl_vvv!("lasx", lasx_xvsrl_h, ls::simd_shr, m256i, u16x16);
 impl_vvv!("lasx", lasx_xvsrl_w, ls::simd_shr, m256i, u32x8);
 impl_vvv!("lasx", lasx_xvsrl_d, ls::simd_shr, m256i, u64x4);
+impl_vvv!("lasx", lasx_xvbitclr_b, ls::simd_bitclr, m256i, u8x32);
+impl_vvv!("lasx", lasx_xvbitclr_h, ls::simd_bitclr, m256i, u16x16);
+impl_vvv!("lasx", lasx_xvbitclr_w, ls::simd_bitclr, m256i, u32x8);
+impl_vvv!("lasx", lasx_xvbitclr_d, ls::simd_bitclr, m256i, u64x4);
+impl_vvv!("lasx", lasx_xvbitset_b, ls::simd_bitset, m256i, u8x32);
+impl_vvv!("lasx", lasx_xvbitset_h, ls::simd_bitset, m256i, u16x16);
+impl_vvv!("lasx", lasx_xvbitset_w, ls::simd_bitset, m256i, u32x8);
+impl_vvv!("lasx", lasx_xvbitset_d, ls::simd_bitset, m256i, u64x4);
+impl_vvv!("lasx", lasx_xvbitrev_b, ls::simd_bitrev, m256i, u8x32);
+impl_vvv!("lasx", lasx_xvbitrev_h, ls::simd_bitrev, m256i, u16x16);
+impl_vvv!("lasx", lasx_xvbitrev_w, ls::simd_bitrev, m256i, u32x8);
+impl_vvv!("lasx", lasx_xvbitrev_d, ls::simd_bitrev, m256i, u64x4);
 
 impl_vuv!("lasx", lasx_xvslli_b, is::simd_shl, m256i, i8x32);
 impl_vuv!("lasx", lasx_xvslli_h, is::simd_shl, m256i, i16x16);
diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs b/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
index 4ec3cdf0c5abb..529dc605c22a7 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
@@ -43,14 +43,6 @@ unsafe extern "unadjusted" {
     fn __lsx_vsrlri_w(a: __v4i32, b: u32) -> __v4i32;
     #[link_name = "llvm.loongarch.lsx.vsrlri.d"]
     fn __lsx_vsrlri_d(a: __v2i64, b: u32) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vbitclr.b"]
-    fn __lsx_vbitclr_b(a: __v16u8, b: __v16u8) -> __v16u8;
-    #[link_name = "llvm.loongarch.lsx.vbitclr.h"]
-    fn __lsx_vbitclr_h(a: __v8u16, b: __v8u16) -> __v8u16;
-    #[link_name = "llvm.loongarch.lsx.vbitclr.w"]
-    fn __lsx_vbitclr_w(a: __v4u32, b: __v4u32) -> __v4u32;
-    #[link_name = "llvm.loongarch.lsx.vbitclr.d"]
-    fn __lsx_vbitclr_d(a: __v2u64, b: __v2u64) -> __v2u64;
     #[link_name = "llvm.loongarch.lsx.vbitclri.b"]
     fn __lsx_vbitclri_b(a: __v16u8, b: u32) -> __v16u8;
     #[link_name = "llvm.loongarch.lsx.vbitclri.h"]
@@ -59,14 +51,6 @@ unsafe extern "unadjusted" {
     fn __lsx_vbitclri_w(a: __v4u32, b: u32) -> __v4u32;
     #[link_name = "llvm.loongarch.lsx.vbitclri.d"]
     fn __lsx_vbitclri_d(a: __v2u64, b: u32) -> __v2u64;
-    #[link_name = "llvm.loongarch.lsx.vbitset.b"]
-    fn __lsx_vbitset_b(a: __v16u8, b: __v16u8) -> __v16u8;
-    #[link_name = "llvm.loongarch.lsx.vbitset.h"]
-    fn __lsx_vbitset_h(a: __v8u16, b: __v8u16) -> __v8u16;
-    #[link_name = "llvm.loongarch.lsx.vbitset.w"]
-    fn __lsx_vbitset_w(a: __v4u32, b: __v4u32) -> __v4u32;
-    #[link_name = "llvm.loongarch.lsx.vbitset.d"]
-    fn __lsx_vbitset_d(a: __v2u64, b: __v2u64) -> __v2u64;
     #[link_name = "llvm.loongarch.lsx.vbitseti.b"]
     fn __lsx_vbitseti_b(a: __v16u8, b: u32) -> __v16u8;
     #[link_name = "llvm.loongarch.lsx.vbitseti.h"]
@@ -75,14 +59,6 @@ unsafe extern "unadjusted" {
     fn __lsx_vbitseti_w(a: __v4u32, b: u32) -> __v4u32;
     #[link_name = "llvm.loongarch.lsx.vbitseti.d"]
     fn __lsx_vbitseti_d(a: __v2u64, b: u32) -> __v2u64;
-    #[link_name = "llvm.loongarch.lsx.vbitrev.b"]
-    fn __lsx_vbitrev_b(a: __v16u8, b: __v16u8) -> __v16u8;
-    #[link_name = "llvm.loongarch.lsx.vbitrev.h"]
-    fn __lsx_vbitrev_h(a: __v8u16, b: __v8u16) -> __v8u16;
-    #[link_name = "llvm.loongarch.lsx.vbitrev.w"]
-    fn __lsx_vbitrev_w(a: __v4u32, b: __v4u32) -> __v4u32;
-    #[link_name = "llvm.loongarch.lsx.vbitrev.d"]
-    fn __lsx_vbitrev_d(a: __v2u64, b: __v2u64) -> __v2u64;
     #[link_name = "llvm.loongarch.lsx.vbitrevi.b"]
     fn __lsx_vbitrevi_b(a: __v16u8, b: u32) -> __v16u8;
     #[link_name = "llvm.loongarch.lsx.vbitrevi.h"]
@@ -1197,34 +1173,6 @@ pub fn lsx_vsrlri_d<const IMM6: u32>(a: m128i) -> m128i {
     unsafe { transmute(__lsx_vsrlri_d(transmute(a), IMM6)) }
 }
 
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitclr_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vbitclr_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitclr_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vbitclr_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitclr_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vbitclr_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitclr_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vbitclr_d(transmute(a), transmute(b))) }
-}
-
 #[inline]
 #[target_feature(enable = "lsx")]
 #[rustc_legacy_const_generics(1)]
@@ -1261,34 +1209,6 @@ pub fn lsx_vbitclri_d<const IMM6: u32>(a: m128i) -> m128i {
     unsafe { transmute(__lsx_vbitclri_d(transmute(a), IMM6)) }
 }
 
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitset_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vbitset_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitset_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vbitset_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitset_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vbitset_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitset_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vbitset_d(transmute(a), transmute(b))) }
-}
-
 #[inline]
 #[target_feature(enable = "lsx")]
 #[rustc_legacy_const_generics(1)]
@@ -1325,34 +1245,6 @@ pub fn lsx_vbitseti_d<const IMM6: u32>(a: m128i) -> m128i {
     unsafe { transmute(__lsx_vbitseti_d(transmute(a), IMM6)) }
 }
 
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitrev_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vbitrev_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitrev_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vbitrev_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitrev_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vbitrev_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitrev_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vbitrev_d(transmute(a), transmute(b))) }
-}
-
 #[inline]
 #[target_feature(enable = "lsx")]
 #[rustc_legacy_const_generics(1)]
diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs b/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs
index 1980000c3d4dd..f6b8daae4e00b 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs
@@ -120,6 +120,18 @@ impl_vvv!("lsx", lsx_vsrl_b, ls::simd_shr, m128i, u8x16);
 impl_vvv!("lsx", lsx_vsrl_h, ls::simd_shr, m128i, u16x8);
 impl_vvv!("lsx", lsx_vsrl_w, ls::simd_shr, m128i, u32x4);
 impl_vvv!("lsx", lsx_vsrl_d, ls::simd_shr, m128i, u64x2);
+impl_vvv!("lsx", lsx_vbitclr_b, ls::simd_bitclr, m128i, u8x16);
+impl_vvv!("lsx", lsx_vbitclr_h, ls::simd_bitclr, m128i, u16x8);
+impl_vvv!("lsx", lsx_vbitclr_w, ls::simd_bitclr, m128i, u32x4);
+impl_vvv!("lsx", lsx_vbitclr_d, ls::simd_bitclr, m128i, u64x2);
+impl_vvv!("lsx", lsx_vbitset_b, ls::simd_bitset, m128i, u8x16);
+impl_vvv!("lsx", lsx_vbitset_h, ls::simd_bitset, m128i, u16x8);
+impl_vvv!("lsx", lsx_vbitset_w, ls::simd_bitset, m128i, u32x4);
+impl_vvv!("lsx", lsx_vbitset_d, ls::simd_bitset, m128i, u64x2);
+impl_vvv!("lsx", lsx_vbitrev_b, ls::simd_bitrev, m128i, u8x16);
+impl_vvv!("lsx", lsx_vbitrev_h, ls::simd_bitrev, m128i, u16x8);
+impl_vvv!("lsx", lsx_vbitrev_w, ls::simd_bitrev, m128i, u32x4);
+impl_vvv!("lsx", lsx_vbitrev_d, ls::simd_bitrev, m128i, u64x2);
 
 impl_vuv!("lsx", lsx_vslli_b, is::simd_shl, m128i, i8x16);
 impl_vuv!("lsx", lsx_vslli_h, is::simd_shl, m128i, i16x8);
diff --git a/library/stdarch/crates/core_arch/src/loongarch64/simd.rs b/library/stdarch/crates/core_arch/src/loongarch64/simd.rs
index 959e2d0cb68f5..6f573f56d690f 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/simd.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/simd.rs
@@ -51,6 +51,24 @@ pub(super) const unsafe fn simd_andn<T: Copy + const SimdExt>(a: T, b: T) -> T {
     is::simd_and(ls::simd_not(a), b)
 }
 
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(super) const unsafe fn simd_bitclr<T: Copy + const SimdExt>(a: T, b: T) -> T {
+    ls::simd_andn(ls::simd_shl(ls::simd_splat(1), b), a)
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(super) const unsafe fn simd_bitrev<T: Copy + const SimdExt>(a: T, b: T) -> T {
+    is::simd_xor(ls::simd_shl(ls::simd_splat(1), b), a)
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(super) const unsafe fn simd_bitset<T: Copy + const SimdExt>(a: T, b: T) -> T {
+    is::simd_or(ls::simd_shl(ls::simd_splat(1), b), a)
+}
+
 #[inline(always)]
 #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
 pub(super) const unsafe fn simd_fmsub<T: Copy>(a: T, b: T, c: T) -> T {
diff --git a/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec b/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec
index 9a9b1a143eff0..93317fca6b60f 100644
--- a/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec
+++ b/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec
@@ -228,21 +228,25 @@ asm-fmts = xd, xj, ui6
 data-types = V4DI, V4DI, UQI
 
 /// lasx_xvbitclr_b
+impl = portable
 name = lasx_xvbitclr_b
 asm-fmts = xd, xj, xk
 data-types = UV32QI, UV32QI, UV32QI
 
 /// lasx_xvbitclr_h
+impl = portable
 name = lasx_xvbitclr_h
 asm-fmts = xd, xj, xk
 data-types = UV16HI, UV16HI, UV16HI
 
 /// lasx_xvbitclr_w
+impl = portable
 name = lasx_xvbitclr_w
 asm-fmts = xd, xj, xk
 data-types = UV8SI, UV8SI, UV8SI
 
 /// lasx_xvbitclr_d
+impl = portable
 name = lasx_xvbitclr_d
 asm-fmts = xd, xj, xk
 data-types = UV4DI, UV4DI, UV4DI
@@ -268,21 +272,25 @@ asm-fmts = xd, xj, ui6
 data-types = UV4DI, UV4DI, UQI
 
 /// lasx_xvbitset_b
+impl = portable
 name = lasx_xvbitset_b
 asm-fmts = xd, xj, xk
 data-types = UV32QI, UV32QI, UV32QI
 
 /// lasx_xvbitset_h
+impl = portable
 name = lasx_xvbitset_h
 asm-fmts = xd, xj, xk
 data-types = UV16HI, UV16HI, UV16HI
 
 /// lasx_xvbitset_w
+impl = portable
 name = lasx_xvbitset_w
 asm-fmts = xd, xj, xk
 data-types = UV8SI, UV8SI, UV8SI
 
 /// lasx_xvbitset_d
+impl = portable
 name = lasx_xvbitset_d
 asm-fmts = xd, xj, xk
 data-types = UV4DI, UV4DI, UV4DI
@@ -308,21 +316,25 @@ asm-fmts = xd, xj, ui6
 data-types = UV4DI, UV4DI, UQI
 
 /// lasx_xvbitrev_b
+impl = portable
 name = lasx_xvbitrev_b
 asm-fmts = xd, xj, xk
 data-types = UV32QI, UV32QI, UV32QI
 
 /// lasx_xvbitrev_h
+impl = portable
 name = lasx_xvbitrev_h
 asm-fmts = xd, xj, xk
 data-types = UV16HI, UV16HI, UV16HI
 
 /// lasx_xvbitrev_w
+impl = portable
 name = lasx_xvbitrev_w
 asm-fmts = xd, xj, xk
 data-types = UV8SI, UV8SI, UV8SI
 
 /// lasx_xvbitrev_d
+impl = portable
 name = lasx_xvbitrev_d
 asm-fmts = xd, xj, xk
 data-types = UV4DI, UV4DI, UV4DI
diff --git a/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec b/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec
index 8fd267889988e..274ebe4144e9f 100644
--- a/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec
+++ b/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec
@@ -228,21 +228,25 @@ asm-fmts = vd, vj, ui6
 data-types = V2DI, V2DI, UQI
 
 /// lsx_vbitclr_b
+impl = portable
 name = lsx_vbitclr_b
 asm-fmts = vd, vj, vk
 data-types = UV16QI, UV16QI, UV16QI
 
 /// lsx_vbitclr_h
+impl = portable
 name = lsx_vbitclr_h
 asm-fmts = vd, vj, vk
 data-types = UV8HI, UV8HI, UV8HI
 
 /// lsx_vbitclr_w
+impl = portable
 name = lsx_vbitclr_w
 asm-fmts = vd, vj, vk
 data-types = UV4SI, UV4SI, UV4SI
 
 /// lsx_vbitclr_d
+impl = portable
 name = lsx_vbitclr_d
 asm-fmts = vd, vj, vk
 data-types = UV2DI, UV2DI, UV2DI
@@ -268,21 +272,25 @@ asm-fmts = vd, vj, ui6
 data-types = UV2DI, UV2DI, UQI
 
 /// lsx_vbitset_b
+impl = portable
 name = lsx_vbitset_b
 asm-fmts = vd, vj, vk
 data-types = UV16QI, UV16QI, UV16QI
 
 /// lsx_vbitset_h
+impl = portable
 name = lsx_vbitset_h
 asm-fmts = vd, vj, vk
 data-types = UV8HI, UV8HI, UV8HI
 
 /// lsx_vbitset_w
+impl = portable
 name = lsx_vbitset_w
 asm-fmts = vd, vj, vk
 data-types = UV4SI, UV4SI, UV4SI
 
 /// lsx_vbitset_d
+impl = portable
 name = lsx_vbitset_d
 asm-fmts = vd, vj, vk
 data-types = UV2DI, UV2DI, UV2DI
@@ -308,21 +316,25 @@ asm-fmts = vd, vj, ui6
 data-types = UV2DI, UV2DI, UQI
 
 /// lsx_vbitrev_b
+impl = portable
 name = lsx_vbitrev_b
 asm-fmts = vd, vj, vk
 data-types = UV16QI, UV16QI, UV16QI
 
 /// lsx_vbitrev_h
+impl = portable
 name = lsx_vbitrev_h
 asm-fmts = vd, vj, vk
 data-types = UV8HI, UV8HI, UV8HI
 
 /// lsx_vbitrev_w
+impl = portable
 name = lsx_vbitrev_w
 asm-fmts = vd, vj, vk
 data-types = UV4SI, UV4SI, UV4SI
 
 /// lsx_vbitrev_d
+impl = portable
 name = lsx_vbitrev_d
 asm-fmts = vd, vj, vk
 data-types = UV2DI, UV2DI, UV2DI
diff --git a/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt b/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt
index 2d5e9817a31c8..f78f594cb9392 100644
--- a/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt
+++ b/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt
@@ -191,6 +191,18 @@ lsx_vrepli_b
 lsx_vrepli_h
 lsx_vrepli_w
 lsx_vrepli_d
+lsx_vbitclr_b
+lsx_vbitclr_h
+lsx_vbitclr_w
+lsx_vbitclr_d
+lsx_vbitset_b
+lsx_vbitset_h
+lsx_vbitset_w
+lsx_vbitset_d
+lsx_vbitrev_b
+lsx_vbitrev_h
+lsx_vbitrev_w
+lsx_vbitrev_d
 
 # LASX intrinsics
 lasx_xvsll_b
@@ -379,3 +391,15 @@ lasx_xvrepli_b
 lasx_xvrepli_h
 lasx_xvrepli_w
 lasx_xvrepli_d
+lasx_xvbitclr_b
+lasx_xvbitclr_h
+lasx_xvbitclr_w
+lasx_xvbitclr_d
+lasx_xvbitset_b
+lasx_xvbitset_h
+lasx_xvbitset_w
+lasx_xvbitset_d
+lasx_xvbitrev_b
+lasx_xvbitrev_h
+lasx_xvbitrev_w
+lasx_xvbitrev_d

From 1e13d52fafb04b7e3b26d5e36770d76c77b3cb5c Mon Sep 17 00:00:00 2001
From: David Wood <david.wood2@arm.com>
Date: Mon, 20 Apr 2026 14:21:22 +0100
Subject: [PATCH 13/78] gen-arm: change defn of `vabdl_high_u{8,16,32}`

Changes the definition of `vabdl_high_u{8,16,32}` to match that of
`vabdl_high_s{8,16,32}` so that the `big_endian_inverse` transformation
can apply.
---
 .../core_arch/src/aarch64/neon/generated.rs   |  26 ++---
 .../spec/neon/aarch64.spec.yml                | 102 ++++++++++++++++--
 2 files changed, 107 insertions(+), 21 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 8e3493dfac128..3f31996655354 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -268,19 +268,6 @@ pub fn vabdl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
     }
 }
 #[doc = "Unsigned Absolute difference Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_u8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(uabdl2))]
-pub fn vabdl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
-    unsafe {
-        let c: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let d: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        simd_cast(vabd_u8(c, d))
-    }
-}
-#[doc = "Unsigned Absolute difference Long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -306,6 +293,19 @@ pub fn vabdl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
         simd_cast(vabd_u32(c, d))
     }
 }
+#[doc = "Unsigned Absolute difference Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_u8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uabdl2))]
+pub fn vabdl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
+    unsafe {
+        let c: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+        let d: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+        simd_cast(vabd_u8(c, d))
+    }
+}
 #[doc = "Floating-point absolute value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabs_f64)"]
 #[inline]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 163145e7ba4d6..bc71eac8d1325 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -11470,8 +11470,8 @@ intrinsics:
                   - FnCall: ["vdup_n_{type[0]}", [b]]
             - '0'
 
-  - name: "vabdl_high_{neon_type[0]}"
-    doc: "Unsigned Absolute difference Long"
+  - name: "vabdl_high{neon_type[0].noq}"
+    doc: Unsigned Absolute difference Long
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
     attr:
@@ -11479,13 +11479,99 @@ intrinsics:
       - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uabdl2]]}]]
     safety: safe
     types:
-      - [uint8x16_t, uint16x8_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x8_t, uint32x4_t, uint16x4_t, '[4, 5, 6, 7]']
-      - [uint32x4_t, uint64x2_t, uint32x2_t, '[2, 3]']
+      - [uint8x16_t, uint16x8_t, uint8x8_t]
     compose:
-      - Let: [c, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, "{type[3]}"]]}]
-      - Let: [d, "{neon_type[2]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
-      - FnCall: [simd_cast, [{FnCall: ["vabd_{neon_type[0]}", [c, d]]}]]
+      - Let:
+          - c
+          - "{neon_type[2]}"
+          - FnCall:
+              - simd_shuffle!
+              - - a
+                - a
+                - [8, 9, 10, 11, 12, 13, 14, 15]
+      - Let:
+          - d
+          - "{neon_type[2]}"
+          - FnCall:
+              - simd_shuffle!
+              - - b
+                - b
+                - [8, 9, 10, 11, 12, 13, 14, 15]
+      - FnCall:
+          - simd_cast
+          - - FnCall:
+                - "vabd_{neon_type[0]}"
+                - - c
+                  - d
+
+  - name: "vabdl_high{neon_type[0].noq}"
+    doc: Unsigned Absolute difference Long
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uabdl2]]}]]
+    safety: safe
+    types:
+      - [uint16x8_t, uint32x4_t, uint16x4_t]
+    compose:
+      - Let:
+          - c
+          - "{neon_type[2]}"
+          - FnCall:
+              - simd_shuffle!
+              - - a
+                - a
+                - [4, 5, 6, 7]
+      - Let:
+          - d
+          - "{neon_type[2]}"
+          - FnCall:
+              - simd_shuffle!
+              - - b
+                - b
+                - [4, 5, 6, 7]
+
+      - FnCall:
+          - simd_cast
+          - - FnCall:
+                - "vabd_{neon_type[0]}"
+                - - c
+                  - d
+
+  - name: "vabdl_high{neon_type[0].noq}"
+    doc: Unsigned Absolute difference Long
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uabdl2]]}]]
+    safety: safe
+    types:
+      - [uint32x4_t, uint64x2_t, uint32x2_t, int32x2_t]
+    compose:
+      - Let:
+          - c
+          - "{neon_type[2]}"
+          - FnCall:
+              - simd_shuffle!
+              - - a
+                - a
+                - [2, 3]
+      - Let:
+          - d
+          - "{neon_type[2]}"
+          - FnCall:
+              - simd_shuffle!
+              - - b
+                - b
+                - [2, 3]
+      - FnCall:
+          - simd_cast
+          - - FnCall:
+                - "vabd_{neon_type[0]}"
+                - - c
+                  - d
 
   - name: "vfms_n_f64"
     doc: "Floating-point fused Multiply-subtract to accumulator(vector)"

From a5073bcca9f749516b9b00de873eac544196ef8c Mon Sep 17 00:00:00 2001
From: David Wood <david.wood2@arm.com>
Date: Wed, 29 Apr 2026 12:55:45 +0100
Subject: [PATCH 14/78] gen-arm: write `vtbl{1,2,3,4}` in terms of `vqtbl`

Writing these intrinsics in terms of the `vqtbl_$ty` functions rather
than the `vqtbl` helper function preserves the behaviour while making
them work in big endian.
---
 .../core_arch/src/aarch64/neon/generated.rs   |  18 +--
 .../spec/neon/aarch64.spec.yml                | 149 ++++++++----------
 2 files changed, 74 insertions(+), 93 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 3f31996655354..00008c6ce60e0 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -25952,7 +25952,7 @@ pub fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t {
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t {
-    unsafe { vqtbl1(transmute(vcombine_s8(a.0, a.1)), transmute(b)) }
+    vqtbl1_s8(vcombine_s8(a.0, a.1), vreinterpret_u8_s8(b))
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"]
@@ -25961,7 +25961,7 @@ pub fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t {
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe { transmute(vqtbl1(transmute(vcombine_u8(a.0, a.1)), b)) }
+    vqtbl1_u8(vcombine_u8(a.0, a.1), b)
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"]
@@ -25970,7 +25970,7 @@ pub fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t {
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t {
-    unsafe { transmute(vqtbl1(transmute(vcombine_p8(a.0, a.1)), b)) }
+    vqtbl1_p8(vcombine_p8(a.0, a.1), b)
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_s8)"]
@@ -25983,7 +25983,7 @@ pub fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t {
         vcombine_s8(a.0, a.1),
         vcombine_s8(a.2, unsafe { crate::mem::zeroed() }),
     );
-    unsafe { transmute(vqtbl2(transmute(x.0), transmute(x.1), transmute(b))) }
+    vqtbl2_s8(x, vreinterpret_u8_s8(b))
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"]
@@ -25996,7 +25996,7 @@ pub fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t {
         vcombine_u8(a.0, a.1),
         vcombine_u8(a.2, unsafe { crate::mem::zeroed() }),
     );
-    unsafe { transmute(vqtbl2(transmute(x.0), transmute(x.1), b)) }
+    vqtbl2_u8(x, b)
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"]
@@ -26009,7 +26009,7 @@ pub fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t {
         vcombine_p8(a.0, a.1),
         vcombine_p8(a.2, unsafe { crate::mem::zeroed() }),
     );
-    unsafe { transmute(vqtbl2(transmute(x.0), transmute(x.1), b)) }
+    vqtbl2_p8(x, b)
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_s8)"]
@@ -26019,7 +26019,7 @@ pub fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t {
     let x = int8x16x2_t(vcombine_s8(a.0, a.1), vcombine_s8(a.2, a.3));
-    unsafe { transmute(vqtbl2(transmute(x.0), transmute(x.1), transmute(b))) }
+    vqtbl2_s8(x, vreinterpret_u8_s8(b))
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"]
@@ -26029,7 +26029,7 @@ pub fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t {
     let x = uint8x16x2_t(vcombine_u8(a.0, a.1), vcombine_u8(a.2, a.3));
-    unsafe { transmute(vqtbl2(transmute(x.0), transmute(x.1), b)) }
+    vqtbl2_u8(x, b)
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"]
@@ -26039,7 +26039,7 @@ pub fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t {
     let x = poly8x16x2_t(vcombine_p8(a.0, a.1), vcombine_p8(a.2, a.3));
-    unsafe { transmute(vqtbl2(transmute(x.0), transmute(x.1), b)) }
+    vqtbl2_p8(x, b)
 }
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_s8)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index bc71eac8d1325..26b31b1e4c295 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -12273,6 +12273,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
     safety: safe
+    big_endian_inverse: false
     types:
       - [int8x8_t, 'int8x8_t', 'unsafe {{ transmute(b) }}']
       - [uint8x8_t, 'uint8x8_t', 'b']
@@ -12286,26 +12287,22 @@ intrinsics:
                   - 'unsafe {{ crate::mem::zeroed() }}'
             - Identifier: ['{type[2]}', Symbol]
 
-  - name: "vtbl2{neon_type[1].noq}"
+  - name: "vtbl2{neon_type[2].no}"
     doc: "Table look-up"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
-    return_type: "{neon_type[1]}"
+    return_type: "{neon_type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
+    big_endian_inverse: false
     safety: safe
     types:
-      - [int8x8x2_t, 'int8x8_t']
+      - ['int8x8x2_t',  'int8x8_t',  'int8x8_t']
     compose:
       - FnCall:
-          - vqtbl1
-          - - FnCall:
-                - transmute
-                - - FnCall:
-                      - 'vcombine{neon_type[1].noq}'
-                      - - 'a.0'
-                        - 'a.1'
-            - FnCall: [transmute, [b]]
+        - 'vqtbl1{neon_type[2].noq}'
+        - - FnCall: ['vcombine{neon_type[2].noq}', ['a.0', 'a.1']]
+          - FnCall: ['vreinterpret_u8{neon_type[2].noq}', [b]]
 
   - name: "vtbl2{neon_type[2].no}"
     doc: "Table look-up"
@@ -12317,124 +12314,108 @@ intrinsics:
     big_endian_inverse: false
     safety: safe
     types:
-      - [uint8x8x2_t, 'uint8x8_t', 'uint8x8_t']
-      - [poly8x8x2_t, 'uint8x8_t', 'poly8x8_t']
+      - ['uint8x8x2_t', 'uint8x8_t', 'uint8x8_t']
+      - ['poly8x8x2_t', 'uint8x8_t', 'poly8x8_t']
     compose:
       - FnCall:
-          - transmute
-          - - FnCall:
-                - vqtbl1
-                - - FnCall:
-                      - transmute
-                      - - FnCall:
-                            - 'vcombine{neon_type[2].noq}'
-                            - - 'a.0'
-                              - 'a.1'
-                  - b
+          - 'vqtbl1{neon_type[2].noq}'
+          - - FnCall: ['vcombine{neon_type[2].noq}', ['a.0', 'a.1']]
+            - b
 
   - name: "vtbl3{neon_type[1].no}"
     doc: "Table look-up"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
-    return_type: "{neon_type[1]}"
+    return_type: "{neon_type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
     safety: safe
+    big_endian_inverse: false
     types:
-      - [int8x8x3_t, 'int8x8_t', 'int8x16x2']
+      - ['int8x8x3_t', 'int8x8_t', 'int8x8_t', 'int8x16x2']
     compose:
       - Let:
-          - x
-          - FnCall:
-              - '{type[2]}_t'
-              - - FnCall: ['vcombine{neon_type[1].no}', ['a.0', 'a.1']]
-                - FnCall: ['vcombine{neon_type[1].no}', ['a.2', 'unsafe {{ crate::mem::zeroed() }}']]
+        - x
+        - FnCall:
+          - '{type[3]}_t'
+          - - FnCall: ['vcombine{neon_type[2].no}', ['a.0', 'a.1']]
+            - FnCall: ['vcombine{neon_type[2].no}', ['a.2', 'unsafe {{ crate::mem::zeroed() }}']]
       - FnCall:
-          - transmute
-          - - FnCall:
-                - vqtbl2
-                - - FnCall: [transmute, ['x.0']]
-                  - FnCall: [transmute, ['x.1']]
-                  - FnCall: [transmute, [b]]
+        - 'vqtbl2{neon_type[2].no}'
+        - - x
+          - FnCall: ['vreinterpret_u8{neon_type[2].noq}', [b]]
+
 
-  - name: "vtbl3{neon_type[3].no}"
+  - name: "vtbl3{neon_type[2].no}"
     doc: "Table look-up"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
-    return_type: "{neon_type[3]}"
+    return_type: "{neon_type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
     safety: safe
-    types:
-      - [uint8x8x3_t, 'uint8x8_t', 'uint8x16x2', 'uint8x8_t']
-      - [poly8x8x3_t, 'uint8x8_t', 'poly8x16x2', 'poly8x8_t']
     big_endian_inverse: false
+    types:
+      - [uint8x8x3_t, 'uint8x8_t', 'uint8x8_t', 'uint8x16x2']
+      - [poly8x8x3_t, 'uint8x8_t', 'poly8x8_t', 'poly8x16x2']
     compose:
       - Let:
-          - x
-          - FnCall:
-              - '{type[2]}_t'
-              - - FnCall: ['vcombine{neon_type[3].no}', ['a.0', 'a.1']]
-                - FnCall: ['vcombine{neon_type[3].no}', ['a.2', 'unsafe {{ crate::mem::zeroed() }}']]
-      - FnCall:
-          - transmute
-          - - FnCall:
-                - vqtbl2
-                - - FnCall: [transmute, ['x.0']]
-                  - FnCall: [transmute, ['x.1']]
-                  - b
+        - x
+        - FnCall:
+          - '{type[3]}_t'
+          - - FnCall: ['vcombine{neon_type[2].no}', ['a.0', 'a.1']]
+            - FnCall: ['vcombine{neon_type[2].no}', ['a.2', 'unsafe {{ crate::mem::zeroed() }}']]
+      - FnCall:
+        - 'vqtbl2{neon_type[2].no}'
+        - - x
+          - b
 
-  - name: "vtbl4{neon_type[1].no}"
+  - name: "vtbl4{neon_type[2].no}"
     doc: "Table look-up"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
-    return_type: "{neon_type[1]}"
+    return_type: "{neon_type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
     safety: safe
+    big_endian_inverse: false
     types:
-      - [int8x8x4_t, 'int8x8_t', 'int8x16x2']
+      - ['int8x8x4_t', 'int8x8_t', 'int8x8_t', 'int8x16x2']
     compose:
       - Let:
-          - x
-          - FnCall:
-              - '{type[2]}_t'
-              - - FnCall: ['vcombine{neon_type[1].no}', ['a.0', 'a.1']]
-                - FnCall: ['vcombine{neon_type[1].no}', ['a.2', 'a.3']]
+        - x
+        - FnCall:
+            - '{type[3]}_t'
+            - - FnCall: ['vcombine{neon_type[1].no}', ['a.0', 'a.1']]
+              - FnCall: ['vcombine{neon_type[1].no}', ['a.2', 'a.3']]
       - FnCall:
-          - transmute
-          - - FnCall:
-                - 'vqtbl2'
-                - - FnCall: [transmute, ['x.0']]
-                  - FnCall: [transmute, ['x.1']]
-                  - FnCall: [transmute, [b]]
+        - 'vqtbl2{neon_type[2].no}'
+        - - x
+          - FnCall: ['vreinterpret_u8{neon_type[2].noq}', [b]]
 
-  - name: "vtbl4{neon_type[3].no}"
+  - name: "vtbl4{neon_type[2].no}"
     doc: "Table look-up"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
-    return_type: "{neon_type[3]}"
+    return_type: "{neon_type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
     safety: safe
-    types:
-      - [uint8x8x4_t, 'uint8x8_t', 'uint8x16x2', 'uint8x8_t']
-      - [poly8x8x4_t, 'uint8x8_t', 'poly8x16x2', 'poly8x8_t']
     big_endian_inverse: false
+    types:
+      - [uint8x8x4_t, 'uint8x8_t', 'uint8x8_t', 'uint8x16x2']
+      - [poly8x8x4_t, 'uint8x8_t', 'poly8x8_t', 'poly8x16x2']
     compose:
       - Let:
-          - x
-          - FnCall:
-              - '{type[2]}_t'
-              - - FnCall: ['vcombine{neon_type[3].no}', ['a.0', 'a.1']]
-                - FnCall: ['vcombine{neon_type[3].no}', ['a.2', 'a.3']]
-      - FnCall:
-          - transmute
-          - - FnCall:
-                - 'vqtbl2'
-                - - FnCall: [transmute, ['x.0']]
-                  - FnCall: [transmute, ['x.1']]
-                  - b
+        - x
+        - FnCall:
+            - '{type[3]}_t'
+            - - FnCall: ['vcombine{neon_type[2].no}', ['a.0', 'a.1']]
+              - FnCall: ['vcombine{neon_type[2].no}', ['a.2', 'a.3']]
+      - FnCall:
+        - 'vqtbl2{neon_type[2].no}'
+        - - x
+          - b
 
   - name: "vqtbx1{neon_type[0].no}"
     doc: "Extended table look-up"

From 62182018068297e48c940325d5d53d542c269a6d Mon Sep 17 00:00:00 2001
From: David Wood <david.wood2@arm.com>
Date: Thu, 30 Apr 2026 14:32:12 +0100
Subject: [PATCH 15/78] gen-arm: write `vtbx{1,2,3,4}` in terms of `vqtbx`

Writing these intrinsics in terms of the `vqtbx_$ty` functions rather
than the `vqtbx` helper function preserves the behaviour while making
them work in big endian.
---
 .../core_arch/src/aarch64/neon/generated.rs   | 109 ++++----
 .../spec/neon/aarch64.spec.yml                | 234 ++++++++++--------
 2 files changed, 192 insertions(+), 151 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 00008c6ce60e0..cca3879571d2f 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -26051,11 +26051,11 @@ pub fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
     unsafe {
         simd_select(
             simd_lt::<int8x8_t, int8x8_t>(c, transmute(i8x8::splat(8))),
-            transmute(vqtbx1(
-                transmute(a),
-                transmute(vcombine_s8(b, crate::mem::zeroed())),
-                transmute(c),
-            )),
+            vqtbx1_s8(
+                a,
+                vcombine_s8(b, crate::mem::zeroed()),
+                vreinterpret_u8_s8(c),
+            ),
             a,
         )
     }
@@ -26070,11 +26070,7 @@ pub fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
     unsafe {
         simd_select(
             simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(8))),
-            transmute(vqtbx1(
-                transmute(a),
-                transmute(vcombine_u8(b, crate::mem::zeroed())),
-                c,
-            )),
+            vqtbx1_u8(a, vcombine_u8(b, crate::mem::zeroed()), c),
             a,
         )
     }
@@ -26089,11 +26085,7 @@ pub fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t {
     unsafe {
         simd_select(
             simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(8))),
-            transmute(vqtbx1(
-                transmute(a),
-                transmute(vcombine_p8(b, crate::mem::zeroed())),
-                c,
-            )),
+            vqtbx1_p8(a, vcombine_p8(b, crate::mem::zeroed()), c),
             a,
         )
     }
@@ -26105,7 +26097,13 @@ pub fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t {
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t {
-    unsafe { vqtbx1(transmute(a), transmute(vcombine_s8(b.0, b.1)), transmute(c)) }
+    unsafe {
+        simd_select(
+            simd_lt::<int8x8_t, int8x8_t>(c, transmute(i8x8::splat(16))),
+            vqtbx1_s8(a, vcombine_s8(b.0, b.1), vreinterpret_u8_s8(c)),
+            a,
+        )
+    }
 }
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"]
@@ -26114,7 +26112,13 @@ pub fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t {
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t {
-    unsafe { transmute(vqtbx1(transmute(a), transmute(vcombine_u8(b.0, b.1)), c)) }
+    unsafe {
+        simd_select(
+            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(16))),
+            vqtbx1_u8(a, vcombine_u8(b.0, b.1), c),
+            a,
+        )
+    }
 }
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"]
@@ -26123,7 +26127,13 @@ pub fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t {
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t {
-    unsafe { transmute(vqtbx1(transmute(a), transmute(vcombine_p8(b.0, b.1)), c)) }
+    unsafe {
+        simd_select(
+            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(16))),
+            vqtbx1_p8(a, vcombine_p8(b.0, b.1), c),
+            a,
+        )
+    }
 }
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_s8)"]
@@ -26137,16 +26147,11 @@ pub fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t {
         vcombine_s8(b.2, unsafe { crate::mem::zeroed() }),
     );
     unsafe {
-        transmute(simd_select(
-            simd_lt::<int8x8_t, int8x8_t>(transmute(c), transmute(i8x8::splat(24))),
-            transmute(vqtbx2(
-                transmute(a),
-                transmute(x.0),
-                transmute(x.1),
-                transmute(c),
-            )),
+        simd_select(
+            simd_lt::<int8x8_t, int8x8_t>(c, transmute(i8x8::splat(24))),
+            vqtbx2_s8(a, x, vreinterpret_u8_s8(c)),
             a,
-        ))
+        )
     }
 }
 #[doc = "Extended table look-up"]
@@ -26161,11 +26166,11 @@ pub fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t {
         vcombine_u8(b.2, unsafe { crate::mem::zeroed() }),
     );
     unsafe {
-        transmute(simd_select(
-            simd_lt::<uint8x8_t, int8x8_t>(transmute(c), transmute(u8x8::splat(24))),
-            transmute(vqtbx2(transmute(a), transmute(x.0), transmute(x.1), c)),
+        simd_select(
+            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(24))),
+            vqtbx2_u8(a, x, c),
             a,
-        ))
+        )
     }
 }
 #[doc = "Extended table look-up"]
@@ -26180,11 +26185,11 @@ pub fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t {
         vcombine_p8(b.2, unsafe { crate::mem::zeroed() }),
     );
     unsafe {
-        transmute(simd_select(
-            simd_lt::<poly8x8_t, int8x8_t>(transmute(c), transmute(u8x8::splat(24))),
-            transmute(vqtbx2(transmute(a), transmute(x.0), transmute(x.1), c)),
+        simd_select(
+            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(24))),
+            vqtbx2_p8(a, x, c),
             a,
-        ))
+        )
     }
 }
 #[doc = "Extended table look-up"]
@@ -26194,12 +26199,12 @@ pub fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t {
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t {
+    let x = int8x16x2_t(vcombine_s8(b.0, b.1), vcombine_s8(b.2, b.3));
     unsafe {
-        vqtbx2(
-            transmute(a),
-            transmute(vcombine_s8(b.0, b.1)),
-            transmute(vcombine_s8(b.2, b.3)),
-            transmute(c),
+        simd_select(
+            simd_lt::<int8x8_t, int8x8_t>(c, transmute(i8x8::splat(32))),
+            vqtbx2_s8(a, x, vreinterpret_u8_s8(c)),
+            a,
         )
     }
 }
@@ -26210,13 +26215,13 @@ pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t {
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t {
+    let x = uint8x16x2_t(vcombine_u8(b.0, b.1), vcombine_u8(b.2, b.3));
     unsafe {
-        transmute(vqtbx2(
-            transmute(a),
-            transmute(vcombine_u8(b.0, b.1)),
-            transmute(vcombine_u8(b.2, b.3)),
-            c,
-        ))
+        simd_select(
+            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(32))),
+            vqtbx2_u8(a, x, c),
+            a,
+        )
     }
 }
 #[doc = "Extended table look-up"]
@@ -26226,13 +26231,13 @@ pub fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t {
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t {
+    let x = poly8x16x2_t(vcombine_p8(b.0, b.1), vcombine_p8(b.2, b.3));
     unsafe {
-        transmute(vqtbx2(
-            transmute(a),
-            transmute(vcombine_p8(b.0, b.1)),
-            transmute(vcombine_p8(b.2, b.3)),
-            c,
-        ))
+        simd_select(
+            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(32))),
+            vqtbx2_p8(a, x, c),
+            a,
+        )
     }
 }
 #[doc = "Transpose vectors"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 26b31b1e4c295..9cceacec9db42 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -12224,20 +12224,29 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
       - *neon-stable
+    big_endian_inverse: false
     safety: safe
     types:
-      - [int8x8_t, int8x8x4_t]
+      - [int8x8_t, 'int8x8x4_t', 'int8x16x2', 'int8x8', 'i8x8::splat(32)']
     compose:
+      - Let:
+        - x
+        - FnCall:
+            - '{type[2]}_t'
+            - - FnCall: ['vcombine{neon_type[0].no}', ['b.0', 'b.1']]
+              - FnCall: ['vcombine{neon_type[0].no}', ['b.2', 'b.3']]
       - FnCall:
-          - "vqtbx2"
-          - - FnCall: [transmute, [a]]
-            - FnCall:
-                - transmute
-                - - FnCall: ["vcombine{neon_type[0].noq}", ["b.0", "b.1"]]
-            - FnCall:
-                - transmute
-                - - FnCall: ["vcombine{neon_type[0].noq}", ["b.2", "b.3"]]
-            - FnCall: [transmute, [c]]
+        - simd_select
+        - - FnCall:
+              - "simd_lt::<{type[3]}_t, int8x8_t>"
+              - - c
+                - FnCall: [transmute, ["{type[4]}"]]
+          - FnCall:
+              - 'vqtbx2{neon_type[0].no}'
+              - - a
+                - x
+                - FnCall: ['vreinterpret_u8{neon_type[0].no}', [c]]
+          - a
 
   - name: "vtbx4{neon_type[0].no}"
     doc: "Extended table look-up"
@@ -12249,21 +12258,27 @@ intrinsics:
     big_endian_inverse: false
     safety: safe
     types:
-      - [uint8x8_t, uint8x8x4_t, uint8x8_t]
-      - [poly8x8_t, poly8x8x4_t, uint8x8_t]
+      - [uint8x8_t, 'uint8x8x4_t', 'uint8x8_t', 'uint8x16x2', 'uint8x8', 'u8x8::splat(32)']
+      - [poly8x8_t, 'poly8x8x4_t', 'uint8x8_t', 'poly8x16x2', 'uint8x8', 'u8x8::splat(32)']
     compose:
+      - Let:
+        - x
+        - FnCall:
+            - '{type[3]}_t'
+            - - FnCall: ['vcombine{neon_type[0].no}', ['b.0', 'b.1']]
+              - FnCall: ['vcombine{neon_type[0].no}', ['b.2', 'b.3']]
       - FnCall:
-          - transmute
-          - - FnCall:
-                - "vqtbx2"
-                - - FnCall: [transmute, [a]]
-                  - FnCall:
-                      - transmute
-                      - - FnCall: ["vcombine{neon_type[0].noq}", ["b.0", "b.1"]]
-                  - FnCall:
-                      - transmute
-                      - - FnCall: ["vcombine{neon_type[0].noq}", ["b.2", "b.3"]]
-                  - c
+        - simd_select
+        - - FnCall:
+              - "simd_lt::<{type[4]}_t, int8x8_t>"
+              - - c
+                - FnCall: [transmute, ["{type[5]}"]]
+          - FnCall:
+              - 'vqtbx2{neon_type[0].no}'
+              - - a
+                - x
+                - c
+          - a
 
   - name: "vtbl1{neon_type[0].no}"
     doc: "Table look-up"
@@ -12461,28 +12476,49 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
       - *neon-stable
+    big_endian_inverse: false
     safety: safe
     types:
-      - [int8x8_t, "int8x8_t", "transmute(c)", "i8x8::splat(8)", "int8x8"]
-      - [uint8x8_t, "uint8x8_t", "c", "u8x8::splat(8)", "uint8x8"]
-      - [poly8x8_t, "uint8x8_t", "c", "u8x8::splat(8)", "uint8x8"]
+      - [int8x8_t, "int8x8_t", "int8x8", "i8x8::splat(8)"]
     compose:
       - FnCall:
-          - simd_select
-          - - FnCall:
-                - "simd_lt::<{type[4]}_t, int8x8_t>"
-                - - c
-                  - FnCall: [transmute, ["{type[3]}"]]
-            - FnCall:
-                - transmute
-                - - FnCall:
-                      - "vqtbx1"
-                      - - "transmute(a)"
-                        - FnCall:
-                            - transmute
-                            - - FnCall: ["vcombine{neon_type[0].no}", [b, "crate::mem::zeroed()"]]
-                        - "{type[2]}"
-            - a
+        - simd_select
+        - - FnCall:
+              - "simd_lt::<{type[2]}_t, int8x8_t>"
+              - - c
+                - FnCall: [transmute, ["{type[3]}"]]
+          - FnCall:
+            - 'vqtbx1{neon_type[0].no}'
+            - - a
+              - FnCall: ['vcombine{neon_type[0].no}', [b, 'crate::mem::zeroed()']]
+              - FnCall: ['vreinterpret_u8{neon_type[0].no}', [c]]
+          - a
+
+  - name: "vtbx1{neon_type[0].no}"
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
+      - *neon-stable
+    big_endian_inverse: false
+    safety: safe
+    types:
+      - [uint8x8_t, "uint8x8_t", "uint8x8", "u8x8::splat(8)"]
+      - [poly8x8_t, "uint8x8_t", "uint8x8", "u8x8::splat(8)"]
+    compose:
+      - FnCall:
+        - simd_select
+        - - FnCall:
+              - "simd_lt::<{type[2]}_t, int8x8_t>"
+              - - c
+                - FnCall: [transmute, ["{type[3]}"]]
+          - FnCall:
+            - 'vqtbx1{neon_type[0].no}'
+            - - a
+              - FnCall: ['vcombine{neon_type[0].no}', [b, 'crate::mem::zeroed()']]
+              - c
+          - a
 
   - name: "vtbx2{neon_type[0].no}"
     doc: "Extended table look-up"
@@ -12491,17 +12527,23 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
       - *neon-stable
+    big_endian_inverse: false
     safety: safe
     types:
-      - [int8x8_t, 'int8x8x2_t']
+      - [int8x8_t, 'int8x8x2_t', 'int8x8', 'i8x8::splat(16)']
     compose:
       - FnCall:
-          - vqtbx1
-          - - FnCall: [transmute, [a]]
-            - FnCall:
-                - transmute
-                - - FnCall: ["vcombine{neon_type[0].no}", ['b.0', 'b.1']]
-            - FnCall: [transmute, [c]]
+        - simd_select
+        - - FnCall:
+              - "simd_lt::<{type[2]}_t, int8x8_t>"
+              - - c
+                - FnCall: [transmute, ["{type[3]}"]]
+          - FnCall:
+              - 'vqtbx1{neon_type[0].no}'
+              - - a
+                - FnCall: ["vcombine{neon_type[0].no}", ['b.0', 'b.1']]
+                - FnCall: ['vreinterpret_u8{neon_type[0].no}', [c]]
+          - a
 
   - name: "vtbx2{neon_type[0].no}"
     doc: "Extended table look-up"
@@ -12513,18 +12555,21 @@ intrinsics:
     big_endian_inverse: false
     safety: safe
     types:
-      - [uint8x8_t, 'uint8x8x2_t', uint8x8_t]
-      - [poly8x8_t, 'poly8x8x2_t', uint8x8_t]
+      - [uint8x8_t, 'uint8x8x2_t', uint8x8_t, 'uint8x8', 'u8x8::splat(16)']
+      - [poly8x8_t, 'poly8x8x2_t', uint8x8_t, 'uint8x8', 'u8x8::splat(16)']
     compose:
       - FnCall:
-          - transmute
-          - - FnCall:
-                - vqtbx1
-                - - FnCall: [transmute, [a]]
-                  - FnCall:
-                      - transmute
-                      - - FnCall: ["vcombine{neon_type[0].no}", ['b.0', 'b.1']]
-                  - c
+        - simd_select
+        - - FnCall:
+              - "simd_lt::<{type[3]}_t, int8x8_t>"
+              - - c
+                - FnCall: [transmute, ["{type[4]}"]]
+          - FnCall:
+              - 'vqtbx1{neon_type[0].no}'
+              - - a
+                - FnCall: ["vcombine{neon_type[0].no}", ['b.0', 'b.1']]
+                - c
+          - a
 
   - name: "vtbx3{neon_type[0].no}"
     doc: "Extended table look-up"
@@ -12533,33 +12578,29 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
       - *neon-stable
+    big_endian_inverse: false
     safety: safe
     types:
-      - [int8x8_t, 'int8x8x3_t', 'int8x16x2', 'i8x8::splat(24)', 'int8x8']
+      - [int8x8_t, 'int8x8x3_t', 'int8x16x2', 'int8x8', 'i8x8::splat(24)']
     compose:
       - Let:
-         - x
-         - FnCall:
-             - '{type[2]}_t'
-             - - FnCall: ['vcombine{neon_type[0].no}', ['b.0', 'b.1']]
-               - FnCall: ['vcombine{neon_type[0].no}', ['b.2', 'unsafe {{ crate::mem::zeroed() }}']]
+        - x
+        - FnCall:
+            - '{type[2]}_t'
+            - - FnCall: ['vcombine{neon_type[0].no}', ['b.0', 'b.1']]
+              - FnCall: ['vcombine{neon_type[0].no}', ['b.2', 'unsafe {{ crate::mem::zeroed() }}']]
       - FnCall:
-          - transmute
-          - - FnCall:
-                - simd_select
-                - - FnCall:
-                      - 'simd_lt::<{type[4]}_t, int8x8_t>'
-                      - - FnCall: [transmute, [c]]
-                        - FnCall: [transmute, ['{type[3]}']]
-                  - FnCall:
-                      - transmute
-                      - - FnCall:
-                            - 'vqtbx2'
-                            - - FnCall: [transmute, [a]]
-                              - FnCall: [transmute, ['x.0']]
-                              - FnCall: [transmute, ['x.1']]
-                              - FnCall: [transmute, [c]]
-                  - a
+        - simd_select
+        - - FnCall:
+              - "simd_lt::<{type[3]}_t, int8x8_t>"
+              - - c
+                - FnCall: [transmute, ["{type[4]}"]]
+          - FnCall:
+              - 'vqtbx2{neon_type[0].no}'
+              - - a
+                - x
+                - FnCall: ['vreinterpret_u8{neon_type[0].no}', [c]]
+          - a
 
   - name: "vtbx3{neon_type[0].no}"
     doc: "Extended table look-up"
@@ -12568,11 +12609,11 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
       - *neon-stable
+    big_endian_inverse: false
     safety: safe
     types:
-      - [uint8x8_t, 'uint8x8x3_t', 'uint8x16x2', 'u8x8::splat(24)', 'uint8x8']
-      - [poly8x8_t, 'poly8x8x3_t', 'poly8x16x2', 'u8x8::splat(24)', 'poly8x8']
-    big_endian_inverse: false
+      - [uint8x8_t, 'uint8x8x3_t', 'uint8x16x2', 'uint8x8', 'u8x8::splat(24)']
+      - [poly8x8_t, 'poly8x8x3_t', 'poly8x16x2', 'uint8x8', 'u8x8::splat(24)']
     compose:
       - Let:
          - x
@@ -12581,22 +12622,17 @@ intrinsics:
              - - FnCall: ['vcombine{neon_type[0].no}', ['b.0', 'b.1']]
                - FnCall: ['vcombine{neon_type[0].no}', ['b.2', 'unsafe {{ crate::mem::zeroed() }}']]
       - FnCall:
-          - transmute
-          - - FnCall:
-                - simd_select
-                - - FnCall:
-                      - 'simd_lt::<{type[4]}_t, int8x8_t>'
-                      - - FnCall: [transmute, [c]]
-                        - FnCall: [transmute, ['{type[3]}']]
-                  - FnCall:
-                      - transmute
-                      - - FnCall:
-                            - 'vqtbx2'
-                            - - FnCall: [transmute, [a]]
-                              - FnCall: [transmute, ['x.0']]
-                              - FnCall: [transmute, ['x.1']]
-                              - c
-                  - a
+        - simd_select
+        - - FnCall:
+              - "simd_lt::<{type[3]}_t, int8x8_t>"
+              - - c
+                - FnCall: [transmute, ["{type[4]}"]]
+          - FnCall:
+              - 'vqtbx2{neon_type[0].no}'
+              - - a
+                - x
+                - c
+          - a
 
   - name: "vqtbl1{neon_type[3].no}"
     doc: "Table look-up"

From c80d077ffc70f74121dc3fbe71be7bc60d684186 Mon Sep 17 00:00:00 2001
From: WANG Rui <wangrui@loongson.cn>
Date: Fri, 24 Apr 2026 23:59:46 +0800
Subject: [PATCH 16/78] loongarch: Use `intrinsics::simd` for vs{add,sub}

---
 .../src/loongarch64/lasx/generated.rs         | 144 ------------------
 .../src/loongarch64/lasx/portable.rs          |  16 ++
 .../src/loongarch64/lsx/generated.rs          | 144 ------------------
 .../core_arch/src/loongarch64/lsx/portable.rs |  16 ++
 .../crates/stdarch-gen-loongarch/lasx.spec    |  16 ++
 .../crates/stdarch-gen-loongarch/lsx.spec     |  16 ++
 .../src/portable-intrinsics.txt               |  32 ++++
 7 files changed, 96 insertions(+), 288 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs b/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
index 29aed37e6b200..387a3e564c530 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
@@ -99,22 +99,6 @@ unsafe extern "unadjusted" {
     fn __lasx_xvadda_w(a: __v8i32, b: __v8i32) -> __v8i32;
     #[link_name = "llvm.loongarch.lasx.xvadda.d"]
     fn __lasx_xvadda_d(a: __v4i64, b: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvsadd.b"]
-    fn __lasx_xvsadd_b(a: __v32i8, b: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvsadd.h"]
-    fn __lasx_xvsadd_h(a: __v16i16, b: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvsadd.w"]
-    fn __lasx_xvsadd_w(a: __v8i32, b: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvsadd.d"]
-    fn __lasx_xvsadd_d(a: __v4i64, b: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvsadd.bu"]
-    fn __lasx_xvsadd_bu(a: __v32u8, b: __v32u8) -> __v32u8;
-    #[link_name = "llvm.loongarch.lasx.xvsadd.hu"]
-    fn __lasx_xvsadd_hu(a: __v16u16, b: __v16u16) -> __v16u16;
-    #[link_name = "llvm.loongarch.lasx.xvsadd.wu"]
-    fn __lasx_xvsadd_wu(a: __v8u32, b: __v8u32) -> __v8u32;
-    #[link_name = "llvm.loongarch.lasx.xvsadd.du"]
-    fn __lasx_xvsadd_du(a: __v4u64, b: __v4u64) -> __v4u64;
     #[link_name = "llvm.loongarch.lasx.xvavg.b"]
     fn __lasx_xvavg_b(a: __v32i8, b: __v32i8) -> __v32i8;
     #[link_name = "llvm.loongarch.lasx.xvavg.h"]
@@ -147,22 +131,6 @@ unsafe extern "unadjusted" {
     fn __lasx_xvavgr_wu(a: __v8u32, b: __v8u32) -> __v8u32;
     #[link_name = "llvm.loongarch.lasx.xvavgr.du"]
     fn __lasx_xvavgr_du(a: __v4u64, b: __v4u64) -> __v4u64;
-    #[link_name = "llvm.loongarch.lasx.xvssub.b"]
-    fn __lasx_xvssub_b(a: __v32i8, b: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvssub.h"]
-    fn __lasx_xvssub_h(a: __v16i16, b: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvssub.w"]
-    fn __lasx_xvssub_w(a: __v8i32, b: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvssub.d"]
-    fn __lasx_xvssub_d(a: __v4i64, b: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvssub.bu"]
-    fn __lasx_xvssub_bu(a: __v32u8, b: __v32u8) -> __v32u8;
-    #[link_name = "llvm.loongarch.lasx.xvssub.hu"]
-    fn __lasx_xvssub_hu(a: __v16u16, b: __v16u16) -> __v16u16;
-    #[link_name = "llvm.loongarch.lasx.xvssub.wu"]
-    fn __lasx_xvssub_wu(a: __v8u32, b: __v8u32) -> __v8u32;
-    #[link_name = "llvm.loongarch.lasx.xvssub.du"]
-    fn __lasx_xvssub_du(a: __v4u64, b: __v4u64) -> __v4u64;
     #[link_name = "llvm.loongarch.lasx.xvabsd.b"]
     fn __lasx_xvabsd_b(a: __v32i8, b: __v32i8) -> __v32i8;
     #[link_name = "llvm.loongarch.lasx.xvabsd.h"]
@@ -1505,62 +1473,6 @@ pub fn lasx_xvadda_d(a: m256i, b: m256i) -> m256i {
     unsafe { transmute(__lasx_xvadda_d(transmute(a), transmute(b))) }
 }
 
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsadd_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsadd_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsadd_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsadd_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsadd_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsadd_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsadd_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsadd_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsadd_bu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsadd_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsadd_hu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsadd_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsadd_wu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsadd_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsadd_du(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsadd_du(transmute(a), transmute(b))) }
-}
-
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
@@ -1673,62 +1585,6 @@ pub fn lasx_xvavgr_du(a: m256i, b: m256i) -> m256i {
     unsafe { transmute(__lasx_xvavgr_du(transmute(a), transmute(b))) }
 }
 
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvssub_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvssub_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvssub_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvssub_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvssub_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvssub_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvssub_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvssub_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvssub_bu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvssub_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvssub_hu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvssub_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvssub_wu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvssub_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvssub_du(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvssub_du(transmute(a), transmute(b))) }
-}
-
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs b/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs
index 5f306502d9311..147d94fe21abc 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs
@@ -132,6 +132,22 @@ impl_vvv!("lasx", lasx_xvbitrev_b, ls::simd_bitrev, m256i, u8x32);
 impl_vvv!("lasx", lasx_xvbitrev_h, ls::simd_bitrev, m256i, u16x16);
 impl_vvv!("lasx", lasx_xvbitrev_w, ls::simd_bitrev, m256i, u32x8);
 impl_vvv!("lasx", lasx_xvbitrev_d, ls::simd_bitrev, m256i, u64x4);
+impl_vvv!("lasx", lasx_xvsadd_b, is::simd_saturating_add, m256i, i8x32);
+impl_vvv!("lasx", lasx_xvsadd_h, is::simd_saturating_add, m256i, i16x16);
+impl_vvv!("lasx", lasx_xvsadd_w, is::simd_saturating_add, m256i, i32x8);
+impl_vvv!("lasx", lasx_xvsadd_d, is::simd_saturating_add, m256i, i64x4);
+impl_vvv!("lasx", lasx_xvsadd_bu, is::simd_saturating_add, m256i, u8x32);
+impl_vvv!("lasx", lasx_xvsadd_hu, is::simd_saturating_add, m256i, u16x16);
+impl_vvv!("lasx", lasx_xvsadd_wu, is::simd_saturating_add, m256i, u32x8);
+impl_vvv!("lasx", lasx_xvsadd_du, is::simd_saturating_add, m256i, u64x4);
+impl_vvv!("lasx", lasx_xvssub_b, is::simd_saturating_sub, m256i, i8x32);
+impl_vvv!("lasx", lasx_xvssub_h, is::simd_saturating_sub, m256i, i16x16);
+impl_vvv!("lasx", lasx_xvssub_w, is::simd_saturating_sub, m256i, i32x8);
+impl_vvv!("lasx", lasx_xvssub_d, is::simd_saturating_sub, m256i, i64x4);
+impl_vvv!("lasx", lasx_xvssub_bu, is::simd_saturating_sub, m256i, u8x32);
+impl_vvv!("lasx", lasx_xvssub_hu, is::simd_saturating_sub, m256i, u16x16);
+impl_vvv!("lasx", lasx_xvssub_wu, is::simd_saturating_sub, m256i, u32x8);
+impl_vvv!("lasx", lasx_xvssub_du, is::simd_saturating_sub, m256i, u64x4);
 
 impl_vuv!("lasx", lasx_xvslli_b, is::simd_shl, m256i, i8x32);
 impl_vuv!("lasx", lasx_xvslli_h, is::simd_shl, m256i, i16x16);
diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs b/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
index 529dc605c22a7..4f9a022c1de4e 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
@@ -99,22 +99,6 @@ unsafe extern "unadjusted" {
     fn __lsx_vadda_w(a: __v4i32, b: __v4i32) -> __v4i32;
     #[link_name = "llvm.loongarch.lsx.vadda.d"]
     fn __lsx_vadda_d(a: __v2i64, b: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vsadd.b"]
-    fn __lsx_vsadd_b(a: __v16i8, b: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vsadd.h"]
-    fn __lsx_vsadd_h(a: __v8i16, b: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vsadd.w"]
-    fn __lsx_vsadd_w(a: __v4i32, b: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vsadd.d"]
-    fn __lsx_vsadd_d(a: __v2i64, b: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vsadd.bu"]
-    fn __lsx_vsadd_bu(a: __v16u8, b: __v16u8) -> __v16u8;
-    #[link_name = "llvm.loongarch.lsx.vsadd.hu"]
-    fn __lsx_vsadd_hu(a: __v8u16, b: __v8u16) -> __v8u16;
-    #[link_name = "llvm.loongarch.lsx.vsadd.wu"]
-    fn __lsx_vsadd_wu(a: __v4u32, b: __v4u32) -> __v4u32;
-    #[link_name = "llvm.loongarch.lsx.vsadd.du"]
-    fn __lsx_vsadd_du(a: __v2u64, b: __v2u64) -> __v2u64;
     #[link_name = "llvm.loongarch.lsx.vavg.b"]
     fn __lsx_vavg_b(a: __v16i8, b: __v16i8) -> __v16i8;
     #[link_name = "llvm.loongarch.lsx.vavg.h"]
@@ -147,22 +131,6 @@ unsafe extern "unadjusted" {
     fn __lsx_vavgr_wu(a: __v4u32, b: __v4u32) -> __v4u32;
     #[link_name = "llvm.loongarch.lsx.vavgr.du"]
     fn __lsx_vavgr_du(a: __v2u64, b: __v2u64) -> __v2u64;
-    #[link_name = "llvm.loongarch.lsx.vssub.b"]
-    fn __lsx_vssub_b(a: __v16i8, b: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vssub.h"]
-    fn __lsx_vssub_h(a: __v8i16, b: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vssub.w"]
-    fn __lsx_vssub_w(a: __v4i32, b: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vssub.d"]
-    fn __lsx_vssub_d(a: __v2i64, b: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vssub.bu"]
-    fn __lsx_vssub_bu(a: __v16u8, b: __v16u8) -> __v16u8;
-    #[link_name = "llvm.loongarch.lsx.vssub.hu"]
-    fn __lsx_vssub_hu(a: __v8u16, b: __v8u16) -> __v8u16;
-    #[link_name = "llvm.loongarch.lsx.vssub.wu"]
-    fn __lsx_vssub_wu(a: __v4u32, b: __v4u32) -> __v4u32;
-    #[link_name = "llvm.loongarch.lsx.vssub.du"]
-    fn __lsx_vssub_du(a: __v2u64, b: __v2u64) -> __v2u64;
     #[link_name = "llvm.loongarch.lsx.vabsd.b"]
     fn __lsx_vabsd_b(a: __v16i8, b: __v16i8) -> __v16i8;
     #[link_name = "llvm.loongarch.lsx.vabsd.h"]
@@ -1417,62 +1385,6 @@ pub fn lsx_vadda_d(a: m128i, b: m128i) -> m128i {
     unsafe { transmute(__lsx_vadda_d(transmute(a), transmute(b))) }
 }
 
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsadd_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsadd_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsadd_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsadd_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsadd_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsadd_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsadd_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsadd_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsadd_bu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsadd_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsadd_hu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsadd_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsadd_wu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsadd_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsadd_du(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsadd_du(transmute(a), transmute(b))) }
-}
-
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
@@ -1585,62 +1497,6 @@ pub fn lsx_vavgr_du(a: m128i, b: m128i) -> m128i {
     unsafe { transmute(__lsx_vavgr_du(transmute(a), transmute(b))) }
 }
 
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vssub_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vssub_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vssub_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vssub_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vssub_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vssub_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vssub_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vssub_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vssub_bu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vssub_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vssub_hu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vssub_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vssub_wu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vssub_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vssub_du(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vssub_du(transmute(a), transmute(b))) }
-}
-
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs b/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs
index f6b8daae4e00b..e98dcf050f56d 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs
@@ -132,6 +132,22 @@ impl_vvv!("lsx", lsx_vbitrev_b, ls::simd_bitrev, m128i, u8x16);
 impl_vvv!("lsx", lsx_vbitrev_h, ls::simd_bitrev, m128i, u16x8);
 impl_vvv!("lsx", lsx_vbitrev_w, ls::simd_bitrev, m128i, u32x4);
 impl_vvv!("lsx", lsx_vbitrev_d, ls::simd_bitrev, m128i, u64x2);
+impl_vvv!("lsx", lsx_vsadd_b, is::simd_saturating_add, m128i, i8x16);
+impl_vvv!("lsx", lsx_vsadd_h, is::simd_saturating_add, m128i, i16x8);
+impl_vvv!("lsx", lsx_vsadd_w, is::simd_saturating_add, m128i, i32x4);
+impl_vvv!("lsx", lsx_vsadd_d, is::simd_saturating_add, m128i, i64x2);
+impl_vvv!("lsx", lsx_vsadd_bu, is::simd_saturating_add, m128i, u8x16);
+impl_vvv!("lsx", lsx_vsadd_hu, is::simd_saturating_add, m128i, u16x8);
+impl_vvv!("lsx", lsx_vsadd_wu, is::simd_saturating_add, m128i, u32x4);
+impl_vvv!("lsx", lsx_vsadd_du, is::simd_saturating_add, m128i, u64x2);
+impl_vvv!("lsx", lsx_vssub_b, is::simd_saturating_sub, m128i, i8x16);
+impl_vvv!("lsx", lsx_vssub_h, is::simd_saturating_sub, m128i, i16x8);
+impl_vvv!("lsx", lsx_vssub_w, is::simd_saturating_sub, m128i, i32x4);
+impl_vvv!("lsx", lsx_vssub_d, is::simd_saturating_sub, m128i, i64x2);
+impl_vvv!("lsx", lsx_vssub_bu, is::simd_saturating_sub, m128i, u8x16);
+impl_vvv!("lsx", lsx_vssub_hu, is::simd_saturating_sub, m128i, u16x8);
+impl_vvv!("lsx", lsx_vssub_wu, is::simd_saturating_sub, m128i, u32x4);
+impl_vvv!("lsx", lsx_vssub_du, is::simd_saturating_sub, m128i, u64x2);
 
 impl_vuv!("lsx", lsx_vslli_b, is::simd_shl, m128i, i8x16);
 impl_vuv!("lsx", lsx_vslli_h, is::simd_shl, m128i, i16x8);
diff --git a/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec b/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec
index 93317fca6b60f..27362d3259f9c 100644
--- a/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec
+++ b/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec
@@ -944,41 +944,49 @@ asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI
 
 /// lasx_xvsadd_b
+impl = portable
 name = lasx_xvsadd_b
 asm-fmts = xd, xj, xk
 data-types = V32QI, V32QI, V32QI
 
 /// lasx_xvsadd_h
+impl = portable
 name = lasx_xvsadd_h
 asm-fmts = xd, xj, xk
 data-types = V16HI, V16HI, V16HI
 
 /// lasx_xvsadd_w
+impl = portable
 name = lasx_xvsadd_w
 asm-fmts = xd, xj, xk
 data-types = V8SI, V8SI, V8SI
 
 /// lasx_xvsadd_d
+impl = portable
 name = lasx_xvsadd_d
 asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI
 
 /// lasx_xvsadd_bu
+impl = portable
 name = lasx_xvsadd_bu
 asm-fmts = xd, xj, xk
 data-types = UV32QI, UV32QI, UV32QI
 
 /// lasx_xvsadd_hu
+impl = portable
 name = lasx_xvsadd_hu
 asm-fmts = xd, xj, xk
 data-types = UV16HI, UV16HI, UV16HI
 
 /// lasx_xvsadd_wu
+impl = portable
 name = lasx_xvsadd_wu
 asm-fmts = xd, xj, xk
 data-types = UV8SI, UV8SI, UV8SI
 
 /// lasx_xvsadd_du
+impl = portable
 name = lasx_xvsadd_du
 asm-fmts = xd, xj, xk
 data-types = UV4DI, UV4DI, UV4DI
@@ -1064,41 +1072,49 @@ asm-fmts = xd, xj, xk
 data-types = UV4DI, UV4DI, UV4DI
 
 /// lasx_xvssub_b
+impl = portable
 name = lasx_xvssub_b
 asm-fmts = xd, xj, xk
 data-types = V32QI, V32QI, V32QI
 
 /// lasx_xvssub_h
+impl = portable
 name = lasx_xvssub_h
 asm-fmts = xd, xj, xk
 data-types = V16HI, V16HI, V16HI
 
 /// lasx_xvssub_w
+impl = portable
 name = lasx_xvssub_w
 asm-fmts = xd, xj, xk
 data-types = V8SI, V8SI, V8SI
 
 /// lasx_xvssub_d
+impl = portable
 name = lasx_xvssub_d
 asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI
 
 /// lasx_xvssub_bu
+impl = portable
 name = lasx_xvssub_bu
 asm-fmts = xd, xj, xk
 data-types = UV32QI, UV32QI, UV32QI
 
 /// lasx_xvssub_hu
+impl = portable
 name = lasx_xvssub_hu
 asm-fmts = xd, xj, xk
 data-types = UV16HI, UV16HI, UV16HI
 
 /// lasx_xvssub_wu
+impl = portable
 name = lasx_xvssub_wu
 asm-fmts = xd, xj, xk
 data-types = UV8SI, UV8SI, UV8SI
 
 /// lasx_xvssub_du
+impl = portable
 name = lasx_xvssub_du
 asm-fmts = xd, xj, xk
 data-types = UV4DI, UV4DI, UV4DI
diff --git a/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec b/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec
index 274ebe4144e9f..9b1b7cc2457d7 100644
--- a/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec
+++ b/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec
@@ -944,41 +944,49 @@ asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI
 
 /// lsx_vsadd_b
+impl = portable
 name = lsx_vsadd_b
 asm-fmts = vd, vj, vk
 data-types = V16QI, V16QI, V16QI
 
 /// lsx_vsadd_h
+impl = portable
 name = lsx_vsadd_h
 asm-fmts = vd, vj, vk
 data-types = V8HI, V8HI, V8HI
 
 /// lsx_vsadd_w
+impl = portable
 name = lsx_vsadd_w
 asm-fmts = vd, vj, vk
 data-types = V4SI, V4SI, V4SI
 
 /// lsx_vsadd_d
+impl = portable
 name = lsx_vsadd_d
 asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI
 
 /// lsx_vsadd_bu
+impl = portable
 name = lsx_vsadd_bu
 asm-fmts = vd, vj, vk
 data-types = UV16QI, UV16QI, UV16QI
 
 /// lsx_vsadd_hu
+impl = portable
 name = lsx_vsadd_hu
 asm-fmts = vd, vj, vk
 data-types = UV8HI, UV8HI, UV8HI
 
 /// lsx_vsadd_wu
+impl = portable
 name = lsx_vsadd_wu
 asm-fmts = vd, vj, vk
 data-types = UV4SI, UV4SI, UV4SI
 
 /// lsx_vsadd_du
+impl = portable
 name = lsx_vsadd_du
 asm-fmts = vd, vj, vk
 data-types = UV2DI, UV2DI, UV2DI
@@ -1064,41 +1072,49 @@ asm-fmts = vd, vj, vk
 data-types = UV2DI, UV2DI, UV2DI
 
 /// lsx_vssub_b
+impl = portable
 name = lsx_vssub_b
 asm-fmts = vd, vj, vk
 data-types = V16QI, V16QI, V16QI
 
 /// lsx_vssub_h
+impl = portable
 name = lsx_vssub_h
 asm-fmts = vd, vj, vk
 data-types = V8HI, V8HI, V8HI
 
 /// lsx_vssub_w
+impl = portable
 name = lsx_vssub_w
 asm-fmts = vd, vj, vk
 data-types = V4SI, V4SI, V4SI
 
 /// lsx_vssub_d
+impl = portable
 name = lsx_vssub_d
 asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI
 
 /// lsx_vssub_bu
+impl = portable
 name = lsx_vssub_bu
 asm-fmts = vd, vj, vk
 data-types = UV16QI, UV16QI, UV16QI
 
 /// lsx_vssub_hu
+impl = portable
 name = lsx_vssub_hu
 asm-fmts = vd, vj, vk
 data-types = UV8HI, UV8HI, UV8HI
 
 /// lsx_vssub_wu
+impl = portable
 name = lsx_vssub_wu
 asm-fmts = vd, vj, vk
 data-types = UV4SI, UV4SI, UV4SI
 
 /// lsx_vssub_du
+impl = portable
 name = lsx_vssub_du
 asm-fmts = vd, vj, vk
 data-types = UV2DI, UV2DI, UV2DI
diff --git a/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt b/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt
index f78f594cb9392..423af4cd71a6c 100644
--- a/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt
+++ b/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt
@@ -203,6 +203,22 @@ lsx_vbitrev_b
 lsx_vbitrev_h
 lsx_vbitrev_w
 lsx_vbitrev_d
+lsx_vsadd_b
+lsx_vsadd_h
+lsx_vsadd_w
+lsx_vsadd_d
+lsx_vsadd_bu
+lsx_vsadd_hu
+lsx_vsadd_wu
+lsx_vsadd_du
+lsx_vssub_b
+lsx_vssub_h
+lsx_vssub_w
+lsx_vssub_d
+lsx_vssub_bu
+lsx_vssub_hu
+lsx_vssub_wu
+lsx_vssub_du
 
 # LASX intrinsics
 lasx_xvsll_b
@@ -403,3 +419,19 @@ lasx_xvbitrev_b
 lasx_xvbitrev_h
 lasx_xvbitrev_w
 lasx_xvbitrev_d
+lasx_xvsadd_b
+lasx_xvsadd_h
+lasx_xvsadd_w
+lasx_xvsadd_d
+lasx_xvsadd_bu
+lasx_xvsadd_hu
+lasx_xvsadd_wu
+lasx_xvsadd_du
+lasx_xvssub_b
+lasx_xvssub_h
+lasx_xvssub_w
+lasx_xvssub_d
+lasx_xvssub_bu
+lasx_xvssub_hu
+lasx_xvssub_wu
+lasx_xvssub_du

From a023ebf71fbd470b30e25596a532f71c2c761ffa Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Fri, 1 May 2026 01:11:01 +0530
Subject: [PATCH 17/78] gen-arm: fix `vmlal`, `vmlsl` and `vmull`

---
 .../core_arch/src/aarch64/neon/generated.rs   | 48 +++++++--------
 .../spec/neon/aarch64.spec.yml                | 60 +++++++++----------
 2 files changed, 54 insertions(+), 54 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 4bf10926e4efd..c67c277c7f2fc 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -14520,7 +14520,7 @@ pub fn vmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmlal_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) }
+    vmlal_high_s16(a, b, vdupq_lane_s16::<LANE>(c))
 }
 #[doc = "Multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_s16)"]
@@ -14535,7 +14535,7 @@ pub fn vmlal_high_laneq_s16<const LANE: i32>(
     c: int16x8_t,
 ) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vmlal_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) }
+    vmlal_high_s16(a, b, vdupq_laneq_s16::<LANE>(c))
 }
 #[doc = "Multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_s32)"]
@@ -14546,7 +14546,7 @@ pub fn vmlal_high_laneq_s16<const LANE: i32>(
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmlal_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) }
+    vmlal_high_s32(a, b, vdupq_lane_s32::<LANE>(c))
 }
 #[doc = "Multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_s32)"]
@@ -14561,7 +14561,7 @@ pub fn vmlal_high_laneq_s32<const LANE: i32>(
     c: int32x4_t,
 ) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmlal_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) }
+    vmlal_high_s32(a, b, vdupq_laneq_s32::<LANE>(c))
 }
 #[doc = "Multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_u16)"]
@@ -14576,7 +14576,7 @@ pub fn vmlal_high_lane_u16<const LANE: i32>(
     c: uint16x4_t,
 ) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmlal_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) }
+    vmlal_high_u16(a, b, vdupq_lane_u16::<LANE>(c))
 }
 #[doc = "Multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_u16)"]
@@ -14591,7 +14591,7 @@ pub fn vmlal_high_laneq_u16<const LANE: i32>(
     c: uint16x8_t,
 ) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vmlal_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) }
+    vmlal_high_u16(a, b, vdupq_laneq_u16::<LANE>(c))
 }
 #[doc = "Multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_u32)"]
@@ -14606,7 +14606,7 @@ pub fn vmlal_high_lane_u32<const LANE: i32>(
     c: uint32x2_t,
 ) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmlal_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) }
+    vmlal_high_u32(a, b, vdupq_lane_u32::<LANE>(c))
 }
 #[doc = "Multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_u32)"]
@@ -14621,7 +14621,7 @@ pub fn vmlal_high_laneq_u32<const LANE: i32>(
     c: uint32x4_t,
 ) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmlal_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) }
+    vmlal_high_u32(a, b, vdupq_laneq_u32::<LANE>(c))
 }
 #[doc = "Multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_s16)"]
@@ -14764,7 +14764,7 @@ pub fn vmlsq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmlsl_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) }
+    vmlsl_high_s16(a, b, vdupq_lane_s16::<LANE>(c))
 }
 #[doc = "Multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_s16)"]
@@ -14779,7 +14779,7 @@ pub fn vmlsl_high_laneq_s16<const LANE: i32>(
     c: int16x8_t,
 ) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vmlsl_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) }
+    vmlsl_high_s16(a, b, vdupq_laneq_s16::<LANE>(c))
 }
 #[doc = "Multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_s32)"]
@@ -14790,7 +14790,7 @@ pub fn vmlsl_high_laneq_s16<const LANE: i32>(
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmlsl_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) }
+    vmlsl_high_s32(a, b, vdupq_lane_s32::<LANE>(c))
 }
 #[doc = "Multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_s32)"]
@@ -14805,7 +14805,7 @@ pub fn vmlsl_high_laneq_s32<const LANE: i32>(
     c: int32x4_t,
 ) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmlsl_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) }
+    vmlsl_high_s32(a, b, vdupq_laneq_s32::<LANE>(c))
 }
 #[doc = "Multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_u16)"]
@@ -14820,7 +14820,7 @@ pub fn vmlsl_high_lane_u16<const LANE: i32>(
     c: uint16x4_t,
 ) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmlsl_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) }
+    vmlsl_high_u16(a, b, vdupq_lane_u16::<LANE>(c))
 }
 #[doc = "Multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_u16)"]
@@ -14835,7 +14835,7 @@ pub fn vmlsl_high_laneq_u16<const LANE: i32>(
     c: uint16x8_t,
 ) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vmlsl_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) }
+    vmlsl_high_u16(a, b, vdupq_laneq_u16::<LANE>(c))
 }
 #[doc = "Multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_u32)"]
@@ -14850,7 +14850,7 @@ pub fn vmlsl_high_lane_u32<const LANE: i32>(
     c: uint32x2_t,
 ) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmlsl_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) }
+    vmlsl_high_u32(a, b, vdupq_lane_u32::<LANE>(c))
 }
 #[doc = "Multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_u32)"]
@@ -14865,7 +14865,7 @@ pub fn vmlsl_high_laneq_u32<const LANE: i32>(
     c: uint32x4_t,
 ) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmlsl_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) }
+    vmlsl_high_u32(a, b, vdupq_laneq_u32::<LANE>(c))
 }
 #[doc = "Multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_s16)"]
@@ -15270,7 +15270,7 @@ pub fn vmulh_laneq_f16<const LANE: i32>(a: f16, b: float16x8_t) -> f16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmull_high_s16(a, simd_shuffle!(b, b, [LANE as u32; 8])) }
+    vmull_high_s16(a, vdupq_lane_s16::<LANE>(b))
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_s16)"]
@@ -15281,7 +15281,7 @@ pub fn vmull_high_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int32
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vmull_high_s16(a, simd_shuffle!(b, b, [LANE as u32; 8])) }
+    vmull_high_s16(a, vdupq_laneq_s16::<LANE>(b))
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_s32)"]
@@ -15292,7 +15292,7 @@ pub fn vmull_high_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int3
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmull_high_s32(a, simd_shuffle!(b, b, [LANE as u32; 4])) }
+    vmull_high_s32(a, vdupq_lane_s32::<LANE>(b))
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_s32)"]
@@ -15303,7 +15303,7 @@ pub fn vmull_high_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int64
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmull_high_s32(a, simd_shuffle!(b, b, [LANE as u32; 4])) }
+    vmull_high_s32(a, vdupq_laneq_s32::<LANE>(b))
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_u16)"]
@@ -15314,7 +15314,7 @@ pub fn vmull_high_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int6
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmull_high_u16(a, simd_shuffle!(b, b, [LANE as u32; 8])) }
+    vmull_high_u16(a, vdupq_lane_u16::<LANE>(b))
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_u16)"]
@@ -15325,7 +15325,7 @@ pub fn vmull_high_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uin
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vmull_high_u16(a, simd_shuffle!(b, b, [LANE as u32; 8])) }
+    vmull_high_u16(a, vdupq_laneq_u16::<LANE>(b))
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_u32)"]
@@ -15336,7 +15336,7 @@ pub fn vmull_high_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> ui
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmull_high_u32(a, simd_shuffle!(b, b, [LANE as u32; 4])) }
+    vmull_high_u32(a, vdupq_lane_u32::<LANE>(b))
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_u32)"]
@@ -15347,7 +15347,7 @@ pub fn vmull_high_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uin
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmull_high_u32(a, simd_shuffle!(b, b, [LANE as u32; 4])) }
+    vmull_high_u32(a, vdupq_laneq_u32::<LANE>(b))
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_s16)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 9c59af133871a..64408a4d92ae1 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -11419,17 +11419,17 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [int32x4_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]']
-      - [int32x4_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]']
-      - [int64x2_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]']
-      - [int64x2_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]']
+      - [int32x4_t, int16x8_t, int16x4_t, '2']
+      - [int32x4_t, int16x8_t, int16x8_t, '3']
+      - [int64x2_t, int32x4_t, int32x2_t, '1']
+      - [int64x2_t, int32x4_t, int32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall:
           - "vmlsl_high_{neon_type[1]}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+            - FnCall: ['vdupq_lane{neon_type[2].no}', [c], [LANE]]
 
   - name: "vmlsl_high_lane{neon_type[2].no}"
     doc: "Multiply-subtract long"
@@ -11442,17 +11442,17 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [uint32x4_t, uint16x8_t, uint16x4_t, '2', '[LANE as u32; 8]']
-      - [uint32x4_t, uint16x8_t, uint16x8_t, '3', '[LANE as u32; 8]']
-      - [uint64x2_t, uint32x4_t, uint32x2_t, '1', '[LANE as u32; 4]']
-      - [uint64x2_t, uint32x4_t, uint32x4_t, '2', '[LANE as u32; 4]']
+      - [uint32x4_t, uint16x8_t, uint16x4_t, '2']
+      - [uint32x4_t, uint16x8_t, uint16x8_t, '3']
+      - [uint64x2_t, uint32x4_t, uint32x2_t, '1']
+      - [uint64x2_t, uint32x4_t, uint32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall:
           - "vmlsl_high_{neon_type[1]}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+            - FnCall: ['vdupq_lane{neon_type[2].no}', [c], [LANE]]
 
   - name: "vclt{neon_type[0].no}"
     doc: "Floating-point compare less than"
@@ -11830,16 +11830,16 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [int16x8_t, int16x4_t, int32x4_t, '2', '[LANE as u32; 8]']
-      - [int16x8_t, int16x8_t, int32x4_t, '3', '[LANE as u32; 8]']
-      - [int32x4_t, int32x2_t, int64x2_t, '1', '[LANE as u32; 4]']
-      - [int32x4_t, int32x4_t, int64x2_t, '2', '[LANE as u32; 4]']
+      - [int16x8_t, int16x4_t, int32x4_t, '2']
+      - [int16x8_t, int16x8_t, int32x4_t, '3']
+      - [int32x4_t, int32x2_t, int64x2_t, '1']
+      - [int32x4_t, int32x4_t, int64x2_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
       - FnCall:
           - "vmull_high_{neon_type[0]}"
           - - a
-            - FnCall: [simd_shuffle!, [b, b, '{type[4]}']]
+            - FnCall: ['vdupq_lane{neon_type[1].no}', [b], [LANE]]
 
   - name: "vmull_high_lane{neon_type[1].no}"
     doc: "Multiply long"
@@ -11852,16 +11852,16 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [uint16x8_t, uint16x4_t, uint32x4_t, '2', '[LANE as u32; 8]']
-      - [uint16x8_t, uint16x8_t, uint32x4_t, '3', '[LANE as u32; 8]']
-      - [uint32x4_t, uint32x2_t, uint64x2_t, '1', '[LANE as u32; 4]']
-      - [uint32x4_t, uint32x4_t, uint64x2_t, '2', '[LANE as u32; 4]']
+      - [uint16x8_t, uint16x4_t, uint32x4_t, '2']
+      - [uint16x8_t, uint16x8_t, uint32x4_t, '3']
+      - [uint32x4_t, uint32x2_t, uint64x2_t, '1']
+      - [uint32x4_t, uint32x4_t, uint64x2_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
       - FnCall:
           - "vmull_high_{neon_type[0]}"
           - - a
-            - FnCall: [simd_shuffle!, [b, b, '{type[4]}']]
+            - FnCall: ['vdupq_lane{neon_type[1].no}', [b], [LANE]]
 
   - name: "vrsqrte{neon_type.no}"
     doc: "Reciprocal square-root estimate."
@@ -12143,13 +12143,13 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [int32x4_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]']
-      - [int32x4_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]']
-      - [int64x2_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]']
-      - [int64x2_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]']
+      - [int32x4_t, int16x8_t, int16x4_t, '2']
+      - [int32x4_t, int16x8_t, int16x8_t, '3']
+      - [int64x2_t, int32x4_t, int32x2_t, '1']
+      - [int64x2_t, int32x4_t, int32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
-      - FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: [simd_shuffle!, [c, c, '{type[4]}']]}]]
+      - FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: ['vdupq_lane{neon_type[2].no}', [c], [LANE]]}]]
 
   - name: "vmlal_high_lane{neon_type[2].no}"
     doc: "Multiply-add long"
@@ -12162,13 +12162,13 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [uint32x4_t, uint16x8_t, uint16x4_t, '2', '[LANE as u32; 8]']
-      - [uint32x4_t, uint16x8_t, uint16x8_t, '3', '[LANE as u32; 8]']
-      - [uint64x2_t, uint32x4_t, uint32x2_t, '1', '[LANE as u32; 4]']
-      - [uint64x2_t, uint32x4_t, uint32x4_t, '2', '[LANE as u32; 4]']
+      - [uint32x4_t, uint16x8_t, uint16x4_t, '2']
+      - [uint32x4_t, uint16x8_t, uint16x8_t, '3']
+      - [uint64x2_t, uint32x4_t, uint32x2_t, '1']
+      - [uint64x2_t, uint32x4_t, uint32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
-      - FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: [simd_shuffle!, [c, c, '{type[4]}']]}]]
+      - FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: ['vdupq_lane{neon_type[2].no}', [c], [LANE]]}]]
 
   - name: "vrsrad_n_u64"
     doc: "Unsigned rounding shift right and accumulate."

From aa37c1b535693bc1836cf51df1a3632185729e0a Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Fri, 1 May 2026 01:59:28 +0530
Subject: [PATCH 18/78] gen-arm: fix `vfmlal` and `vfmlsl`

---
 .../core_arch/src/aarch64/neon/generated.rs   | 32 ++++++------
 .../spec/neon/aarch64.spec.yml                | 51 +++++++++----------
 2 files changed, 40 insertions(+), 43 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index c67c277c7f2fc..5a0bbfa09552b 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -10756,7 +10756,7 @@ pub fn vfmlal_lane_high_f16<const LANE: i32>(
     b: float16x4_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfmlal_high_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlal_high_f16(r, a, vdup_lane_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_laneq_high_f16)"]
@@ -10773,7 +10773,7 @@ pub fn vfmlal_laneq_high_f16<const LANE: i32>(
     b: float16x8_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vfmlal_high_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlal_high_f16(r, a, vdup_laneq_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_lane_high_f16)"]
@@ -10790,7 +10790,7 @@ pub fn vfmlalq_lane_high_f16<const LANE: i32>(
     b: float16x4_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfmlalq_high_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlalq_high_f16(r, a, vdupq_lane_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_laneq_high_f16)"]
@@ -10807,7 +10807,7 @@ pub fn vfmlalq_laneq_high_f16<const LANE: i32>(
     b: float16x8_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vfmlalq_high_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlalq_high_f16(r, a, vdupq_laneq_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_lane_low_f16)"]
@@ -10824,7 +10824,7 @@ pub fn vfmlal_lane_low_f16<const LANE: i32>(
     b: float16x4_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfmlal_low_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlal_low_f16(r, a, vdup_lane_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_laneq_low_f16)"]
@@ -10841,7 +10841,7 @@ pub fn vfmlal_laneq_low_f16<const LANE: i32>(
     b: float16x8_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vfmlal_low_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlal_low_f16(r, a, vdup_laneq_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_lane_low_f16)"]
@@ -10858,7 +10858,7 @@ pub fn vfmlalq_lane_low_f16<const LANE: i32>(
     b: float16x4_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfmlalq_low_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlalq_low_f16(r, a, vdupq_lane_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_laneq_low_f16)"]
@@ -10875,7 +10875,7 @@ pub fn vfmlalq_laneq_low_f16<const LANE: i32>(
     b: float16x8_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vfmlalq_low_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlalq_low_f16(r, a, vdupq_laneq_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_low_f16)"]
@@ -10964,7 +10964,7 @@ pub fn vfmlsl_lane_high_f16<const LANE: i32>(
     b: float16x4_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfmlsl_high_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlsl_high_f16(r, a, vdup_lane_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_laneq_high_f16)"]
@@ -10981,7 +10981,7 @@ pub fn vfmlsl_laneq_high_f16<const LANE: i32>(
     b: float16x8_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vfmlsl_high_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlsl_high_f16(r, a, vdup_laneq_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_lane_high_f16)"]
@@ -10998,7 +10998,7 @@ pub fn vfmlslq_lane_high_f16<const LANE: i32>(
     b: float16x4_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfmlslq_high_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlslq_high_f16(r, a, vdupq_lane_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_laneq_high_f16)"]
@@ -11015,7 +11015,7 @@ pub fn vfmlslq_laneq_high_f16<const LANE: i32>(
     b: float16x8_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vfmlslq_high_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlslq_high_f16(r, a, vdupq_laneq_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_lane_low_f16)"]
@@ -11032,7 +11032,7 @@ pub fn vfmlsl_lane_low_f16<const LANE: i32>(
     b: float16x4_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfmlsl_low_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlsl_low_f16(r, a, vdup_lane_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_laneq_low_f16)"]
@@ -11049,7 +11049,7 @@ pub fn vfmlsl_laneq_low_f16<const LANE: i32>(
     b: float16x8_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vfmlsl_low_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlsl_low_f16(r, a, vdup_laneq_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_lane_low_f16)"]
@@ -11066,7 +11066,7 @@ pub fn vfmlslq_lane_low_f16<const LANE: i32>(
     b: float16x4_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfmlslq_low_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlslq_low_f16(r, a, vdupq_lane_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_laneq_low_f16)"]
@@ -11083,7 +11083,7 @@ pub fn vfmlslq_laneq_low_f16<const LANE: i32>(
     b: float16x8_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vfmlslq_low_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlslq_low_f16(r, a, vdupq_laneq_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_low_f16)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 64408a4d92ae1..6950f69731c35 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -13920,7 +13920,7 @@ intrinsics:
               arch: aarch64,arm64ec
 
 
-  - name: "vfmlal{type[3]}{neon_type[1]}"
+  - name: "vfmlal{type[3]}_high_{neon_type[1]}"
     doc: "Floating-point fused Multiply-Add Long to accumulator (by element)."
     arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}",  "b: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
@@ -13934,18 +13934,17 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [float32x2_t, float16x4_t, float16x4_t, '_lane_high_', '_high_', '2']
-      - [float32x2_t, float16x4_t, float16x8_t, '_laneq_high_', '_high_', '3']
-      - [float32x4_t, float16x8_t, float16x4_t, 'q_lane_high_', 'q_high_', '2']
-      - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_high_', 'q_high_', '3']
+      - [float32x2_t, float16x4_t, float16x4_t, '_lane', '_high_', '2']
+      - [float32x2_t, float16x4_t, float16x8_t, '_laneq', '_high_', '3']
+      - [float32x4_t, float16x8_t, float16x4_t, 'q_lane', 'q_high_', '2']
+      - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq', 'q_high_', '3']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]]
       - FnCall:
           - "vfmlal{type[4]}{neon_type[1]}"
           - - r
             - a
-            - FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]]
-
+            - FnCall: ["vdup{type[3]}_{neon_type[2]}", [b], [LANE]]
 
   - name: "vfmlal{type[2]}{neon_type[1]}"
     doc: "Floating-point fused Multiply-Add Long to accumulator (vector)."
@@ -13969,7 +13968,7 @@ intrinsics:
               arch: aarch64,arm64ec
 
 
-  - name: "vfmlal{type[3]}{neon_type[1]}"
+  - name: "vfmlal{type[3]}_low_{neon_type[1]}"
     doc: "Floating-point fused Multiply-Add Long to accumulator (by element)."
     arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}",  "b: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
@@ -13983,18 +13982,17 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [float32x2_t, float16x4_t, float16x4_t, '_lane_low_', '_low_', '2']
-      - [float32x2_t, float16x4_t, float16x8_t, '_laneq_low_', '_low_', '3']
-      - [float32x4_t, float16x8_t, float16x4_t, 'q_lane_low_', 'q_low_', '2']
-      - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_low_', 'q_low_', '3']
+      - [float32x2_t, float16x4_t, float16x4_t, '_lane', '_low_', '2']
+      - [float32x2_t, float16x4_t, float16x8_t, '_laneq', '_low_', '3']
+      - [float32x4_t, float16x8_t, float16x4_t, 'q_lane', 'q_low_', '2']
+      - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq', 'q_low_', '3']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]]
       - FnCall:
           - "vfmlal{type[4]}{neon_type[1]}"
           - - r
             - a
-            - FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]]
-
+            - FnCall: ["vdup{type[3]}_{neon_type[2]}", [b], [LANE]]
 
   - name: "vfmlsl{type[2]}{neon_type[1]}"
     doc: "Floating-point fused Multiply-Subtract Long from accumulator (vector)."
@@ -14017,7 +14015,7 @@ intrinsics:
             - link: "llvm.aarch64.neon.fmlsl2.{neon_type[0]}.{neon_type[1]}"
               arch: aarch64,arm64ec
 
-  - name: "vfmlsl{type[3]}{neon_type[1]}"
+  - name: "vfmlsl{type[3]}_high_{neon_type[1]}"
     doc: "Floating-point fused Multiply-Subtract Long from accumulator (by element)."
     arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}",  "b: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
@@ -14031,18 +14029,17 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [float32x2_t, float16x4_t, float16x4_t, '_lane_high_', '_high_', '2']
-      - [float32x2_t, float16x4_t, float16x8_t, '_laneq_high_', '_high_', '3']
-      - [float32x4_t, float16x8_t, float16x4_t, 'q_lane_high_', 'q_high_', '2']
-      - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_high_', 'q_high_', '3']
+      - [float32x2_t, float16x4_t, float16x4_t, '_lane', '_high_', '2']
+      - [float32x2_t, float16x4_t, float16x8_t, '_laneq', '_high_', '3']
+      - [float32x4_t, float16x8_t, float16x4_t, 'q_lane', 'q_high_', '2']
+      - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq', 'q_high_', '3']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]]
       - FnCall:
           - "vfmlsl{type[4]}{neon_type[1]}"
           - - r
             - a
-            - FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]]
-
+            - FnCall: ["vdup{type[3]}_{neon_type[2]}", [b], [LANE]]
 
   - name: "vfmlsl{type[2]}{neon_type[1]}"
     doc: "Floating-point fused Multiply-Subtract Long from accumulator (vector)."
@@ -14065,7 +14062,7 @@ intrinsics:
             - link: "llvm.aarch64.neon.fmlsl.{neon_type[0]}.{neon_type[1]}"
               arch: aarch64,arm64ec
 
-  - name: "vfmlsl{type[3]}{neon_type[1]}"
+  - name: "vfmlsl{type[3]}_low_{neon_type[1]}"
     doc: "Floating-point fused Multiply-Subtract Long from accumulator (by element)."
     arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}",  "b: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
@@ -14079,17 +14076,17 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [float32x2_t, float16x4_t, float16x4_t, '_lane_low_', '_low_', '2']
-      - [float32x2_t, float16x4_t, float16x8_t, '_laneq_low_', '_low_', '3']
-      - [float32x4_t, float16x8_t, float16x4_t, 'q_lane_low_', 'q_low_', '2']
-      - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_low_', 'q_low_', '3']
+      - [float32x2_t, float16x4_t, float16x4_t, '_lane', '_low_', '2']
+      - [float32x2_t, float16x4_t, float16x8_t, '_laneq', '_low_', '3']
+      - [float32x4_t, float16x8_t, float16x4_t, 'q_lane', 'q_low_', '2']
+      - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq', 'q_low_', '3']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]]
       - FnCall:
           - "vfmlsl{type[4]}{neon_type[1]}"
           - - r
             - a
-            - FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]]
+            - FnCall: ["vdup{type[3]}_{neon_type[2]}", [b], [LANE]]
 
   - name: "vamax{neon_type.no}"
     doc: "Multi-vector floating-point absolute maximum"

From a2e77970c2ead88c5cbeae46943765530ef4f8af Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Fri, 8 May 2026 14:50:34 +0530
Subject: [PATCH 19/78] replace uses of `simd_extract` with `vget_lane`

---
 .../core_arch/src/aarch64/neon/generated.rs   | 558 ++++++++----------
 .../src/arm_shared/neon/generated.rs          |  14 +-
 .../spec/neon/aarch64.spec.yml                | 327 +++++-----
 .../spec/neon/arm_shared.spec.yml             |  10 +-
 4 files changed, 423 insertions(+), 486 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 5a0bbfa09552b..2624e2f22d002 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -204,7 +204,7 @@ pub fn vabdq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fabd))]
 pub fn vabdd_f64(a: f64, b: f64) -> f64 {
-    unsafe { simd_extract!(vabd_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) }
+    vget_lane_f64::<0>(vabd_f64(vdup_n_f64(a), vdup_n_f64(b)))
 }
 #[doc = "Floating-point absolute difference"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabds_f32)"]
@@ -213,7 +213,7 @@ pub fn vabdd_f64(a: f64, b: f64) -> f64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fabd))]
 pub fn vabds_f32(a: f32, b: f32) -> f32 {
-    unsafe { simd_extract!(vabd_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) }
+    vget_lane_f32::<0>(vabd_f32(vdup_n_f32(a), vdup_n_f32(b)))
 }
 #[doc = "Floating-point absolute difference"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdh_f16)"]
@@ -223,7 +223,7 @@ pub fn vabds_f32(a: f32, b: f32) -> f32 {
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fabd))]
 pub fn vabdh_f16(a: f16, b: f16) -> f16 {
-    unsafe { simd_extract!(vabd_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) }
+    vget_lane_f16::<0>(vabd_f16(vdup_n_f16(a), vdup_n_f16(b)))
 }
 #[doc = "Signed Absolute difference Long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s16)"]
@@ -1539,7 +1539,7 @@ pub fn vceqq_p64(a: poly64x2_t, b: poly64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vceqd_f64(a: f64, b: f64) -> u64 {
-    unsafe { simd_extract!(vceq_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) }
+    vget_lane_u64::<0>(vceq_f64(vdup_n_f64(a), vdup_n_f64(b)))
 }
 #[doc = "Floating-point compare equal"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqs_f32)"]
@@ -1548,7 +1548,7 @@ pub fn vceqd_f64(a: f64, b: f64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vceqs_f32(a: f32, b: f32) -> u32 {
-    unsafe { simd_extract!(vceq_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) }
+    vget_lane_u32::<0>(vceq_f32(vdup_n_f32(a), vdup_n_f32(b)))
 }
 #[doc = "Compare bitwise equal"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqd_s64)"]
@@ -1576,7 +1576,7 @@ pub fn vceqd_u64(a: u64, b: u64) -> u64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vceqh_f16(a: f16, b: f16) -> u16 {
-    unsafe { simd_extract!(vceq_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) }
+    vget_lane_u16::<0>(vceq_f16(vdup_n_f16(a), vdup_n_f16(b)))
 }
 #[doc = "Floating-point compare bitwise equal to zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_f16)"]
@@ -1866,7 +1866,7 @@ pub fn vceqzd_u64(a: u64) -> u64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vceqzh_f16(a: f16) -> u16 {
-    unsafe { simd_extract!(vceqz_f16(vdup_n_f16(a)), 0) }
+    vget_lane_u16::<0>(vceqz_f16(vdup_n_f16(a)))
 }
 #[doc = "Floating-point compare bitwise equal to zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzs_f32)"]
@@ -1875,7 +1875,7 @@ pub fn vceqzh_f16(a: f16) -> u16 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vceqzs_f32(a: f32) -> u32 {
-    unsafe { simd_extract!(vceqz_f32(vdup_n_f32(a)), 0) }
+    vget_lane_u32::<0>(vceqz_f32(vdup_n_f32(a)))
 }
 #[doc = "Floating-point compare bitwise equal to zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzd_f64)"]
@@ -1884,7 +1884,7 @@ pub fn vceqzs_f32(a: f32) -> u32 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vceqzd_f64(a: f64) -> u64 {
-    unsafe { simd_extract!(vceqz_f64(vdup_n_f64(a)), 0) }
+    vget_lane_u64::<0>(vceqz_f64(vdup_n_f64(a)))
 }
 #[doc = "Floating-point compare greater than or equal"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_f64)"]
@@ -1947,7 +1947,7 @@ pub fn vcgeq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcged_f64(a: f64, b: f64) -> u64 {
-    unsafe { simd_extract!(vcge_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) }
+    vget_lane_u64::<0>(vcge_f64(vdup_n_f64(a), vdup_n_f64(b)))
 }
 #[doc = "Floating-point compare greater than or equal"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcges_f32)"]
@@ -1956,7 +1956,7 @@ pub fn vcged_f64(a: f64, b: f64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcges_f32(a: f32, b: f32) -> u32 {
-    unsafe { simd_extract!(vcge_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) }
+    vget_lane_u32::<0>(vcge_f32(vdup_n_f32(a), vdup_n_f32(b)))
 }
 #[doc = "Compare greater than or equal"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcged_s64)"]
@@ -1984,7 +1984,7 @@ pub fn vcged_u64(a: u64, b: u64) -> u64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgeh_f16(a: f16, b: f16) -> u16 {
-    unsafe { simd_extract!(vcge_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) }
+    vget_lane_u16::<0>(vcge_f16(vdup_n_f16(a), vdup_n_f16(b)))
 }
 #[doc = "Floating-point compare greater than or equal to zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgez_f32)"]
@@ -2113,7 +2113,7 @@ pub fn vcgezq_s64(a: int64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcgezd_f64(a: f64) -> u64 {
-    unsafe { simd_extract!(vcgez_f64(vdup_n_f64(a)), 0) }
+    vget_lane_u64::<0>(vcgez_f64(vdup_n_f64(a)))
 }
 #[doc = "Floating-point compare greater than or equal to zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezs_f32)"]
@@ -2122,7 +2122,7 @@ pub fn vcgezd_f64(a: f64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcgezs_f32(a: f32) -> u32 {
-    unsafe { simd_extract!(vcgez_f32(vdup_n_f32(a)), 0) }
+    vget_lane_u32::<0>(vcgez_f32(vdup_n_f32(a)))
 }
 #[doc = "Compare signed greater than or equal to zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezd_s64)"]
@@ -2141,7 +2141,7 @@ pub fn vcgezd_s64(a: i64) -> u64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgezh_f16(a: f16) -> u16 {
-    unsafe { simd_extract!(vcgez_f16(vdup_n_f16(a)), 0) }
+    vget_lane_u16::<0>(vcgez_f16(vdup_n_f16(a)))
 }
 #[doc = "Floating-point compare greater than"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_f64)"]
@@ -2204,7 +2204,7 @@ pub fn vcgtq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcgtd_f64(a: f64, b: f64) -> u64 {
-    unsafe { simd_extract!(vcgt_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) }
+    vget_lane_u64::<0>(vcgt_f64(vdup_n_f64(a), vdup_n_f64(b)))
 }
 #[doc = "Floating-point compare greater than"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgts_f32)"]
@@ -2213,7 +2213,7 @@ pub fn vcgtd_f64(a: f64, b: f64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcgts_f32(a: f32, b: f32) -> u32 {
-    unsafe { simd_extract!(vcgt_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) }
+    vget_lane_u32::<0>(vcgt_f32(vdup_n_f32(a), vdup_n_f32(b)))
 }
 #[doc = "Compare greater than"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtd_s64)"]
@@ -2241,7 +2241,7 @@ pub fn vcgtd_u64(a: u64, b: u64) -> u64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgth_f16(a: f16, b: f16) -> u16 {
-    unsafe { simd_extract!(vcgt_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) }
+    vget_lane_u16::<0>(vcgt_f16(vdup_n_f16(a), vdup_n_f16(b)))
 }
 #[doc = "Floating-point compare greater than zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtz_f32)"]
@@ -2370,7 +2370,7 @@ pub fn vcgtzq_s64(a: int64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcgtzd_f64(a: f64) -> u64 {
-    unsafe { simd_extract!(vcgtz_f64(vdup_n_f64(a)), 0) }
+    vget_lane_u64::<0>(vcgtz_f64(vdup_n_f64(a)))
 }
 #[doc = "Floating-point compare greater than zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzs_f32)"]
@@ -2379,7 +2379,7 @@ pub fn vcgtzd_f64(a: f64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcgtzs_f32(a: f32) -> u32 {
-    unsafe { simd_extract!(vcgtz_f32(vdup_n_f32(a)), 0) }
+    vget_lane_u32::<0>(vcgtz_f32(vdup_n_f32(a)))
 }
 #[doc = "Compare signed greater than zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzd_s64)"]
@@ -2398,7 +2398,7 @@ pub fn vcgtzd_s64(a: i64) -> u64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgtzh_f16(a: f16) -> u16 {
-    unsafe { simd_extract!(vcgtz_f16(vdup_n_f16(a)), 0) }
+    vget_lane_u16::<0>(vcgtz_f16(vdup_n_f16(a)))
 }
 #[doc = "Floating-point compare less than or equal"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_f64)"]
@@ -2461,7 +2461,7 @@ pub fn vcleq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcled_f64(a: f64, b: f64) -> u64 {
-    unsafe { simd_extract!(vcle_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) }
+    vget_lane_u64::<0>(vcle_f64(vdup_n_f64(a), vdup_n_f64(b)))
 }
 #[doc = "Floating-point compare less than or equal"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcles_f32)"]
@@ -2470,7 +2470,7 @@ pub fn vcled_f64(a: f64, b: f64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcles_f32(a: f32, b: f32) -> u32 {
-    unsafe { simd_extract!(vcle_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) }
+    vget_lane_u32::<0>(vcle_f32(vdup_n_f32(a), vdup_n_f32(b)))
 }
 #[doc = "Compare less than or equal"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcled_u64)"]
@@ -2498,7 +2498,7 @@ pub fn vcled_s64(a: i64, b: i64) -> u64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcleh_f16(a: f16, b: f16) -> u16 {
-    unsafe { simd_extract!(vcle_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) }
+    vget_lane_u16::<0>(vcle_f16(vdup_n_f16(a), vdup_n_f16(b)))
 }
 #[doc = "Floating-point compare less than or equal to zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclez_f32)"]
@@ -2627,7 +2627,7 @@ pub fn vclezq_s64(a: int64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vclezd_f64(a: f64) -> u64 {
-    unsafe { simd_extract!(vclez_f64(vdup_n_f64(a)), 0) }
+    vget_lane_u64::<0>(vclez_f64(vdup_n_f64(a)))
 }
 #[doc = "Floating-point compare less than or equal to zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezs_f32)"]
@@ -2636,7 +2636,7 @@ pub fn vclezd_f64(a: f64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vclezs_f32(a: f32) -> u32 {
-    unsafe { simd_extract!(vclez_f32(vdup_n_f32(a)), 0) }
+    vget_lane_u32::<0>(vclez_f32(vdup_n_f32(a)))
 }
 #[doc = "Compare less than or equal to zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezd_s64)"]
@@ -2655,7 +2655,7 @@ pub fn vclezd_s64(a: i64) -> u64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vclezh_f16(a: f16) -> u16 {
-    unsafe { simd_extract!(vclez_f16(vdup_n_f16(a)), 0) }
+    vget_lane_u16::<0>(vclez_f16(vdup_n_f16(a)))
 }
 #[doc = "Floating-point compare less than"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_f64)"]
@@ -2737,7 +2737,7 @@ pub fn vcltd_s64(a: i64, b: i64) -> u64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vclth_f16(a: f16, b: f16) -> u16 {
-    unsafe { simd_extract!(vclt_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) }
+    vget_lane_u16::<0>(vclt_f16(vdup_n_f16(a), vdup_n_f16(b)))
 }
 #[doc = "Floating-point compare less than"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclts_f32)"]
@@ -2746,7 +2746,7 @@ pub fn vclth_f16(a: f16, b: f16) -> u16 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vclts_f32(a: f32, b: f32) -> u32 {
-    unsafe { simd_extract!(vclt_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) }
+    vget_lane_u32::<0>(vclt_f32(vdup_n_f32(a), vdup_n_f32(b)))
 }
 #[doc = "Floating-point compare less than"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltd_f64)"]
@@ -2755,7 +2755,7 @@ pub fn vclts_f32(a: f32, b: f32) -> u32 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcltd_f64(a: f64, b: f64) -> u64 {
-    unsafe { simd_extract!(vclt_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) }
+    vget_lane_u64::<0>(vclt_f64(vdup_n_f64(a), vdup_n_f64(b)))
 }
 #[doc = "Floating-point compare less than zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltz_f32)"]
@@ -2884,7 +2884,7 @@ pub fn vcltzq_s64(a: int64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcltzd_f64(a: f64) -> u64 {
-    unsafe { simd_extract!(vcltz_f64(vdup_n_f64(a)), 0) }
+    vget_lane_u64::<0>(vcltz_f64(vdup_n_f64(a)))
 }
 #[doc = "Floating-point compare less than zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzs_f32)"]
@@ -2893,7 +2893,7 @@ pub fn vcltzd_f64(a: f64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcltzs_f32(a: f32) -> u32 {
-    unsafe { simd_extract!(vcltz_f32(vdup_n_f32(a)), 0) }
+    vget_lane_u32::<0>(vcltz_f32(vdup_n_f32(a)))
 }
 #[doc = "Compare less than zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzd_s64)"]
@@ -2912,7 +2912,7 @@ pub fn vcltzd_s64(a: i64) -> u64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcltzh_f16(a: f16) -> u16 {
-    unsafe { simd_extract!(vcltz_f16(vdup_n_f16(a)), 0) }
+    vget_lane_u16::<0>(vcltz_f16(vdup_n_f16(a)))
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_f16)"]
@@ -4662,7 +4662,7 @@ pub fn vcopy_laneq_f64<const LANE1: i32, const LANE2: i32>(
 ) -> float64x1_t {
     static_assert!(LANE1 == 0);
     static_assert_uimm_bits!(LANE2, 1);
-    unsafe { transmute::<f64, _>(simd_extract!(b, LANE2 as u32)) }
+    unsafe { transmute(vgetq_lane_f64::<LANE2>(b)) }
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s64)"]
@@ -4677,7 +4677,7 @@ pub fn vcopy_laneq_s64<const LANE1: i32, const LANE2: i32>(
 ) -> int64x1_t {
     static_assert!(LANE1 == 0);
     static_assert_uimm_bits!(LANE2, 1);
-    unsafe { transmute::<i64, _>(simd_extract!(b, LANE2 as u32)) }
+    unsafe { transmute(vgetq_lane_s64::<LANE2>(b)) }
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u64)"]
@@ -4692,7 +4692,7 @@ pub fn vcopy_laneq_u64<const LANE1: i32, const LANE2: i32>(
 ) -> uint64x1_t {
     static_assert!(LANE1 == 0);
     static_assert_uimm_bits!(LANE2, 1);
-    unsafe { transmute::<u64, _>(simd_extract!(b, LANE2 as u32)) }
+    unsafe { transmute(vgetq_lane_u64::<LANE2>(b)) }
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_p64)"]
@@ -4707,7 +4707,7 @@ pub fn vcopy_laneq_p64<const LANE1: i32, const LANE2: i32>(
 ) -> poly64x1_t {
     static_assert!(LANE1 == 0);
     static_assert_uimm_bits!(LANE2, 1);
-    unsafe { transmute::<u64, _>(simd_extract!(b, LANE2 as u32)) }
+    unsafe { transmute(vgetq_lane_p64::<LANE2>(b)) }
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_f32)"]
@@ -9763,7 +9763,7 @@ pub fn vcvtx_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
 #[cfg_attr(test, assert_instr(fcvtxn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcvtxd_f32_f64(a: f64) -> f32 {
-    unsafe { simd_extract!(vcvtx_f32_f64(vdupq_n_f64(a)), 0) }
+    vget_lane_f32::<0>(vcvtx_f32_f64(vdupq_n_f64(a)))
 }
 #[doc = "Divide"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdiv_f16)"]
@@ -9862,7 +9862,7 @@ pub fn vdup_lane_p64<const N: i32>(a: poly64x1_t) -> poly64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdup_laneq_f64<const N: i32>(a: float64x2_t) -> float64x1_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe { transmute::<f64, _>(simd_extract!(a, N as u32)) }
+    unsafe { transmute(vgetq_lane_f64::<N>(a)) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p64)"]
@@ -9873,7 +9873,7 @@ pub fn vdup_laneq_f64<const N: i32>(a: float64x2_t) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdup_laneq_p64<const N: i32>(a: poly64x2_t) -> poly64x1_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe { transmute::<u64, _>(simd_extract!(a, N as u32)) }
+    unsafe { transmute(vgetq_lane_p64::<N>(a)) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_lane_s8)"]
@@ -9884,7 +9884,7 @@ pub fn vdup_laneq_p64<const N: i32>(a: poly64x2_t) -> poly64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdupb_lane_s8<const N: i32>(a: int8x8_t) -> i8 {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_s8::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_s16)"]
@@ -9895,7 +9895,7 @@ pub fn vdupb_lane_s8<const N: i32>(a: int8x8_t) -> i8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vduph_laneq_s16<const N: i32>(a: int16x8_t) -> i16 {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_s16::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_lane_u8)"]
@@ -9906,7 +9906,7 @@ pub fn vduph_laneq_s16<const N: i32>(a: int16x8_t) -> i16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdupb_lane_u8<const N: i32>(a: uint8x8_t) -> u8 {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_u8::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_u16)"]
@@ -9917,7 +9917,7 @@ pub fn vdupb_lane_u8<const N: i32>(a: uint8x8_t) -> u8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vduph_laneq_u16<const N: i32>(a: uint16x8_t) -> u16 {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_u16::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_lane_p8)"]
@@ -9928,7 +9928,7 @@ pub fn vduph_laneq_u16<const N: i32>(a: uint16x8_t) -> u16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdupb_lane_p8<const N: i32>(a: poly8x8_t) -> p8 {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_p8::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_p16)"]
@@ -9939,7 +9939,7 @@ pub fn vdupb_lane_p8<const N: i32>(a: poly8x8_t) -> p8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vduph_laneq_p16<const N: i32>(a: poly16x8_t) -> p16 {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_p16::<N>(a)
 }
 #[doc = "Extract an element from a vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_s8)"]
@@ -9950,7 +9950,7 @@ pub fn vduph_laneq_p16<const N: i32>(a: poly16x8_t) -> p16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdupb_laneq_s8<const N: i32>(a: int8x16_t) -> i8 {
     static_assert_uimm_bits!(N, 4);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_s8::<N>(a)
 }
 #[doc = "Extract an element from a vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_u8)"]
@@ -9961,7 +9961,7 @@ pub fn vdupb_laneq_s8<const N: i32>(a: int8x16_t) -> i8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdupb_laneq_u8<const N: i32>(a: uint8x16_t) -> u8 {
     static_assert_uimm_bits!(N, 4);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_u8::<N>(a)
 }
 #[doc = "Extract an element from a vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_p8)"]
@@ -9972,7 +9972,7 @@ pub fn vdupb_laneq_u8<const N: i32>(a: uint8x16_t) -> u8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdupb_laneq_p8<const N: i32>(a: poly8x16_t) -> p8 {
     static_assert_uimm_bits!(N, 4);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_p8::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_lane_f64)"]
@@ -9983,7 +9983,7 @@ pub fn vdupb_laneq_p8<const N: i32>(a: poly8x16_t) -> p8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdupd_lane_f64<const N: i32>(a: float64x1_t) -> f64 {
     static_assert!(N == 0);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_f64::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_lane_s64)"]
@@ -9994,7 +9994,7 @@ pub fn vdupd_lane_f64<const N: i32>(a: float64x1_t) -> f64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdupd_lane_s64<const N: i32>(a: int64x1_t) -> i64 {
     static_assert!(N == 0);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_s64::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_lane_u64)"]
@@ -10005,7 +10005,7 @@ pub fn vdupd_lane_s64<const N: i32>(a: int64x1_t) -> i64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdupd_lane_u64<const N: i32>(a: uint64x1_t) -> u64 {
     static_assert!(N == 0);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_u64::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_f16)"]
@@ -10017,7 +10017,7 @@ pub fn vdupd_lane_u64<const N: i32>(a: uint64x1_t) -> u64 {
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vduph_lane_f16<const N: i32>(a: float16x4_t) -> f16 {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_f16::<N>(a)
 }
 #[doc = "Extract an element from a vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_f16)"]
@@ -10029,7 +10029,7 @@ pub fn vduph_lane_f16<const N: i32>(a: float16x4_t) -> f16 {
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vduph_laneq_f16<const N: i32>(a: float16x8_t) -> f16 {
     static_assert_uimm_bits!(N, 4);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_f16::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f64)"]
@@ -10084,7 +10084,7 @@ pub fn vdupq_laneq_p64<const N: i32>(a: poly64x2_t) -> poly64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdups_lane_f32<const N: i32>(a: float32x2_t) -> f32 {
     static_assert_uimm_bits!(N, 1);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_f32::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_laneq_f64)"]
@@ -10095,7 +10095,7 @@ pub fn vdups_lane_f32<const N: i32>(a: float32x2_t) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdupd_laneq_f64<const N: i32>(a: float64x2_t) -> f64 {
     static_assert_uimm_bits!(N, 1);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_f64::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_lane_s32)"]
@@ -10106,7 +10106,7 @@ pub fn vdupd_laneq_f64<const N: i32>(a: float64x2_t) -> f64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdups_lane_s32<const N: i32>(a: int32x2_t) -> i32 {
     static_assert_uimm_bits!(N, 1);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_s32::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_laneq_s64)"]
@@ -10117,7 +10117,7 @@ pub fn vdups_lane_s32<const N: i32>(a: int32x2_t) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdupd_laneq_s64<const N: i32>(a: int64x2_t) -> i64 {
     static_assert_uimm_bits!(N, 1);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_s64::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_lane_u32)"]
@@ -10128,7 +10128,7 @@ pub fn vdupd_laneq_s64<const N: i32>(a: int64x2_t) -> i64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdups_lane_u32<const N: i32>(a: uint32x2_t) -> u32 {
     static_assert_uimm_bits!(N, 1);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_u32::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_laneq_u64)"]
@@ -10139,7 +10139,7 @@ pub fn vdups_lane_u32<const N: i32>(a: uint32x2_t) -> u32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdupd_laneq_u64<const N: i32>(a: uint64x2_t) -> u64 {
     static_assert_uimm_bits!(N, 1);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_u64::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_laneq_f32)"]
@@ -10150,7 +10150,7 @@ pub fn vdupd_laneq_u64<const N: i32>(a: uint64x2_t) -> u64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdups_laneq_f32<const N: i32>(a: float32x4_t) -> f32 {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_f32::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_s16)"]
@@ -10161,7 +10161,7 @@ pub fn vdups_laneq_f32<const N: i32>(a: float32x4_t) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vduph_lane_s16<const N: i32>(a: int16x4_t) -> i16 {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_s16::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_laneq_s32)"]
@@ -10172,7 +10172,7 @@ pub fn vduph_lane_s16<const N: i32>(a: int16x4_t) -> i16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdups_laneq_s32<const N: i32>(a: int32x4_t) -> i32 {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_s32::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_u16)"]
@@ -10183,7 +10183,7 @@ pub fn vdups_laneq_s32<const N: i32>(a: int32x4_t) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vduph_lane_u16<const N: i32>(a: uint16x4_t) -> u16 {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_u16::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_laneq_u32)"]
@@ -10194,7 +10194,7 @@ pub fn vduph_lane_u16<const N: i32>(a: uint16x4_t) -> u16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdups_laneq_u32<const N: i32>(a: uint32x4_t) -> u32 {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_u32::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_p16)"]
@@ -10205,7 +10205,7 @@ pub fn vdups_laneq_u32<const N: i32>(a: uint32x4_t) -> u32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vduph_lane_p16<const N: i32>(a: poly16x4_t) -> p16 {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_p16::<N>(a)
 }
 #[doc = "Three-way exclusive OR"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_s8)"]
@@ -10392,7 +10392,7 @@ pub fn vfma_lane_f16<const LANE: i32>(
     c: float16x4_t,
 ) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfma_f16(a, b, vdup_n_f16(simd_extract!(c, LANE as u32))) }
+    vfma_f16(a, b, vdup_n_f16(vget_lane_f16::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_laneq_f16)"]
@@ -10408,7 +10408,7 @@ pub fn vfma_laneq_f16<const LANE: i32>(
     c: float16x8_t,
 ) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vfma_f16(a, b, vdup_n_f16(simd_extract!(c, LANE as u32))) }
+    vfma_f16(a, b, vdup_n_f16(vgetq_lane_f16::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_lane_f16)"]
@@ -10424,7 +10424,7 @@ pub fn vfmaq_lane_f16<const LANE: i32>(
     c: float16x4_t,
 ) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfmaq_f16(a, b, vdupq_n_f16(simd_extract!(c, LANE as u32))) }
+    vfmaq_f16(a, b, vdupq_n_f16(vget_lane_f16::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_laneq_f16)"]
@@ -10440,7 +10440,7 @@ pub fn vfmaq_laneq_f16<const LANE: i32>(
     c: float16x8_t,
 ) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vfmaq_f16(a, b, vdupq_n_f16(simd_extract!(c, LANE as u32))) }
+    vfmaq_f16(a, b, vdupq_n_f16(vgetq_lane_f16::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_lane_f32)"]
@@ -10455,7 +10455,7 @@ pub fn vfma_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vfma_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32))) }
+    vfma_f32(a, b, vdup_n_f32(vget_lane_f32::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_laneq_f32)"]
@@ -10470,7 +10470,7 @@ pub fn vfma_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfma_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32))) }
+    vfma_f32(a, b, vdup_n_f32(vgetq_lane_f32::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_lane_f32)"]
@@ -10485,7 +10485,7 @@ pub fn vfmaq_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vfmaq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32))) }
+    vfmaq_f32(a, b, vdupq_n_f32(vget_lane_f32::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_laneq_f32)"]
@@ -10500,7 +10500,7 @@ pub fn vfmaq_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfmaq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32))) }
+    vfmaq_f32(a, b, vdupq_n_f32(vgetq_lane_f32::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_laneq_f64)"]
@@ -10515,7 +10515,7 @@ pub fn vfmaq_laneq_f64<const LANE: i32>(
     c: float64x2_t,
 ) -> float64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vfmaq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32))) }
+    vfmaq_f64(a, b, vdupq_n_f64(vgetq_lane_f64::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_lane_f64)"]
@@ -10530,7 +10530,7 @@ pub fn vfma_lane_f64<const LANE: i32>(
     c: float64x1_t,
 ) -> float64x1_t {
     static_assert!(LANE == 0);
-    unsafe { vfma_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32))) }
+    vfma_f64(a, b, vdup_n_f64(vget_lane_f64::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_laneq_f64)"]
@@ -10545,7 +10545,7 @@ pub fn vfma_laneq_f64<const LANE: i32>(
     c: float64x2_t,
 ) -> float64x1_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vfma_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32))) }
+    vfma_f64(a, b, vdup_n_f64(vgetq_lane_f64::<LANE>(c)))
 }
 #[doc = "Floating-point fused Multiply-Subtract from accumulator."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_n_f16)"]
@@ -10585,10 +10585,8 @@ pub fn vfma_n_f64(a: float64x1_t, b: float64x1_t, c: f64) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vfmad_lane_f64<const LANE: i32>(a: f64, b: f64, c: float64x1_t) -> f64 {
     static_assert!(LANE == 0);
-    unsafe {
-        let c: f64 = simd_extract!(c, LANE as u32);
-        fmaf64(b, c, a)
-    }
+    let c: f64 = vget_lane_f64::<LANE>(c);
+    fmaf64(b, c, a)
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmah_f16)"]
@@ -10610,10 +10608,8 @@ pub fn vfmah_f16(a: f16, b: f16, c: f16) -> f16 {
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmah_lane_f16<const LANE: i32>(a: f16, b: f16, v: float16x4_t) -> f16 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: f16 = simd_extract!(v, LANE as u32);
-        vfmah_f16(a, b, c)
-    }
+    let c: f16 = vget_lane_f16::<LANE>(v);
+    vfmah_f16(a, b, c)
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmah_laneq_f16)"]
@@ -10625,10 +10621,8 @@ pub fn vfmah_lane_f16<const LANE: i32>(a: f16, b: f16, v: float16x4_t) -> f16 {
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmah_laneq_f16<const LANE: i32>(a: f16, b: f16, v: float16x8_t) -> f16 {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        let c: f16 = simd_extract!(v, LANE as u32);
-        vfmah_f16(a, b, c)
-    }
+    let c: f16 = vgetq_lane_f16::<LANE>(v);
+    vfmah_f16(a, b, c)
 }
 #[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f64)"]
@@ -10652,7 +10646,7 @@ pub fn vfmaq_lane_f64<const LANE: i32>(
     c: float64x1_t,
 ) -> float64x2_t {
     static_assert!(LANE == 0);
-    unsafe { vfmaq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32))) }
+    vfmaq_f64(a, b, vdupq_n_f64(vget_lane_f64::<LANE>(c)))
 }
 #[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_n_f64)"]
@@ -10672,10 +10666,8 @@ pub fn vfmaq_n_f64(a: float64x2_t, b: float64x2_t, c: f64) -> float64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vfmas_lane_f32<const LANE: i32>(a: f32, b: f32, c: float32x2_t) -> f32 {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: f32 = simd_extract!(c, LANE as u32);
-        fmaf32(b, c, a)
-    }
+    let c: f32 = vget_lane_f32::<LANE>(c);
+    fmaf32(b, c, a)
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmas_laneq_f32)"]
@@ -10686,10 +10678,8 @@ pub fn vfmas_lane_f32<const LANE: i32>(a: f32, b: f32, c: float32x2_t) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vfmas_laneq_f32<const LANE: i32>(a: f32, b: f32, c: float32x4_t) -> f32 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: f32 = simd_extract!(c, LANE as u32);
-        fmaf32(b, c, a)
-    }
+    let c: f32 = vgetq_lane_f32::<LANE>(c);
+    fmaf32(b, c, a)
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmad_laneq_f64)"]
@@ -10700,10 +10690,8 @@ pub fn vfmas_laneq_f32<const LANE: i32>(a: f32, b: f32, c: float32x4_t) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vfmad_laneq_f64<const LANE: i32>(a: f64, b: f64, c: float64x2_t) -> f64 {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: f64 = simd_extract!(c, LANE as u32);
-        fmaf64(b, c, a)
-    }
+    let c: f64 = vgetq_lane_f64::<LANE>(c);
+    fmaf64(b, c, a)
 }
 #[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_high_f16)"]
@@ -11147,7 +11135,7 @@ pub fn vfms_lane_f16<const LANE: i32>(
     c: float16x4_t,
 ) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfms_f16(a, b, vdup_n_f16(simd_extract!(c, LANE as u32))) }
+    vfms_f16(a, b, vdup_n_f16(vget_lane_f16::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-subtract from accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_laneq_f16)"]
@@ -11163,7 +11151,7 @@ pub fn vfms_laneq_f16<const LANE: i32>(
     c: float16x8_t,
 ) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vfms_f16(a, b, vdup_n_f16(simd_extract!(c, LANE as u32))) }
+    vfms_f16(a, b, vdup_n_f16(vgetq_lane_f16::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-subtract from accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_lane_f16)"]
@@ -11179,7 +11167,7 @@ pub fn vfmsq_lane_f16<const LANE: i32>(
     c: float16x4_t,
 ) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfmsq_f16(a, b, vdupq_n_f16(simd_extract!(c, LANE as u32))) }
+    vfmsq_f16(a, b, vdupq_n_f16(vget_lane_f16::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-subtract from accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_laneq_f16)"]
@@ -11195,7 +11183,7 @@ pub fn vfmsq_laneq_f16<const LANE: i32>(
     c: float16x8_t,
 ) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vfmsq_f16(a, b, vdupq_n_f16(simd_extract!(c, LANE as u32))) }
+    vfmsq_f16(a, b, vdupq_n_f16(vgetq_lane_f16::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-subtract to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_lane_f32)"]
@@ -11210,7 +11198,7 @@ pub fn vfms_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vfms_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32))) }
+    vfms_f32(a, b, vdup_n_f32(vget_lane_f32::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-subtract to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_laneq_f32)"]
@@ -11225,7 +11213,7 @@ pub fn vfms_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfms_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32))) }
+    vfms_f32(a, b, vdup_n_f32(vgetq_lane_f32::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-subtract to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_lane_f32)"]
@@ -11240,7 +11228,7 @@ pub fn vfmsq_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vfmsq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32))) }
+    vfmsq_f32(a, b, vdupq_n_f32(vget_lane_f32::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-subtract to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_laneq_f32)"]
@@ -11255,7 +11243,7 @@ pub fn vfmsq_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfmsq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32))) }
+    vfmsq_f32(a, b, vdupq_n_f32(vgetq_lane_f32::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-subtract to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_laneq_f64)"]
@@ -11270,7 +11258,7 @@ pub fn vfmsq_laneq_f64<const LANE: i32>(
     c: float64x2_t,
 ) -> float64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vfmsq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32))) }
+    vfmsq_f64(a, b, vdupq_n_f64(vgetq_lane_f64::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-subtract to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_lane_f64)"]
@@ -11285,7 +11273,7 @@ pub fn vfms_lane_f64<const LANE: i32>(
     c: float64x1_t,
 ) -> float64x1_t {
     static_assert!(LANE == 0);
-    unsafe { vfms_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32))) }
+    vfms_f64(a, b, vdup_n_f64(vget_lane_f64::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-subtract to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_laneq_f64)"]
@@ -11300,7 +11288,7 @@ pub fn vfms_laneq_f64<const LANE: i32>(
     c: float64x2_t,
 ) -> float64x1_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vfms_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32))) }
+    vfms_f64(a, b, vdup_n_f64(vgetq_lane_f64::<LANE>(c)))
 }
 #[doc = "Floating-point fused Multiply-Subtract from accumulator."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_n_f16)"]
@@ -11351,10 +11339,8 @@ pub fn vfmsh_f16(a: f16, b: f16, c: f16) -> f16 {
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmsh_lane_f16<const LANE: i32>(a: f16, b: f16, v: float16x4_t) -> f16 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: f16 = simd_extract!(v, LANE as u32);
-        vfmsh_f16(a, b, c)
-    }
+    let c: f16 = vget_lane_f16::<LANE>(v);
+    vfmsh_f16(a, b, c)
 }
 #[doc = "Floating-point fused multiply-subtract from accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsh_laneq_f16)"]
@@ -11366,10 +11352,8 @@ pub fn vfmsh_lane_f16<const LANE: i32>(a: f16, b: f16, v: float16x4_t) -> f16 {
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmsh_laneq_f16<const LANE: i32>(a: f16, b: f16, v: float16x8_t) -> f16 {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        let c: f16 = simd_extract!(v, LANE as u32);
-        vfmsh_f16(a, b, c)
-    }
+    let c: f16 = vgetq_lane_f16::<LANE>(v);
+    vfmsh_f16(a, b, c)
 }
 #[doc = "Floating-point fused multiply-subtract from accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f64)"]
@@ -11396,7 +11380,7 @@ pub fn vfmsq_lane_f64<const LANE: i32>(
     c: float64x1_t,
 ) -> float64x2_t {
     static_assert!(LANE == 0);
-    unsafe { vfmsq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32))) }
+    vfmsq_f64(a, b, vdupq_n_f64(vget_lane_f64::<LANE>(c)))
 }
 #[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_n_f64)"]
@@ -15152,7 +15136,7 @@ pub fn vmulq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmul_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t) -> float64x1_t {
     static_assert!(LANE == 0);
-    unsafe { simd_mul(a, transmute::<f64, _>(simd_extract!(b, LANE as u32))) }
+    unsafe { simd_mul(a, transmute::<f64, _>(vget_lane_f64::<LANE>(b))) }
 }
 #[doc = "Floating-point multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f16)"]
@@ -15187,7 +15171,7 @@ pub fn vmulq_laneq_f16<const LANE: i32>(a: float16x8_t, b: float16x8_t) -> float
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmul_laneq_f64<const LANE: i32>(a: float64x1_t, b: float64x2_t) -> float64x1_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { simd_mul(a, transmute::<f64, _>(simd_extract!(b, LANE as u32))) }
+    unsafe { simd_mul(a, transmute::<f64, _>(vgetq_lane_f64::<LANE>(b))) }
 }
 #[doc = "Vector multiply by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f64)"]
@@ -15216,10 +15200,8 @@ pub fn vmulq_n_f64(a: float64x2_t, b: f64) -> float64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmuld_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> f64 {
     static_assert!(LANE == 0);
-    unsafe {
-        let b: f64 = simd_extract!(b, LANE as u32);
-        a * b
-    }
+    let b: f64 = vget_lane_f64::<LANE>(b);
+    a * b
 }
 #[doc = "Add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulh_f16)"]
@@ -15241,10 +15223,8 @@ pub fn vmulh_f16(a: f16, b: f16) -> f16 {
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulh_lane_f16<const LANE: i32>(a: f16, b: float16x4_t) -> f16 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let b: f16 = simd_extract!(b, LANE as u32);
-        a * b
-    }
+    let b: f16 = vget_lane_f16::<LANE>(b);
+    a * b
 }
 #[doc = "Floating-point multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulh_laneq_f16)"]
@@ -15256,10 +15236,8 @@ pub fn vmulh_lane_f16<const LANE: i32>(a: f16, b: float16x4_t) -> f16 {
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulh_laneq_f16<const LANE: i32>(a: f16, b: float16x8_t) -> f16 {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        let b: f16 = simd_extract!(b, LANE as u32);
-        a * b
-    }
+    let b: f16 = vgetq_lane_f16::<LANE>(b);
+    a * b
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_s16)"]
@@ -15392,7 +15370,7 @@ pub fn vmull_high_n_u32(a: uint32x4_t, b: u32) -> uint64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(pmull2))]
 pub fn vmull_high_p64(a: poly64x2_t, b: poly64x2_t) -> p128 {
-    unsafe { vmull_p64(simd_extract!(a, 1), simd_extract!(b, 1)) }
+    vmull_p64(vgetq_lane_p64::<1>(a), vgetq_lane_p64::<1>(b))
 }
 #[doc = "Polynomial multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_p8)"]
@@ -15532,10 +15510,8 @@ pub fn vmulq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t) -> float
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmuls_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> f32 {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let b: f32 = simd_extract!(b, LANE as u32);
-        a * b
-    }
+    let b: f32 = vget_lane_f32::<LANE>(b);
+    a * b
 }
 #[doc = "Floating-point multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuls_laneq_f32)"]
@@ -15546,10 +15522,8 @@ pub fn vmuls_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmuls_laneq_f32<const LANE: i32>(a: f32, b: float32x4_t) -> f32 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let b: f32 = simd_extract!(b, LANE as u32);
-        a * b
-    }
+    let b: f32 = vgetq_lane_f32::<LANE>(b);
+    a * b
 }
 #[doc = "Floating-point multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuld_laneq_f64)"]
@@ -15560,10 +15534,8 @@ pub fn vmuls_laneq_f32<const LANE: i32>(a: f32, b: float32x4_t) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmuld_laneq_f64<const LANE: i32>(a: f64, b: float64x2_t) -> f64 {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let b: f64 = simd_extract!(b, LANE as u32);
-        a * b
-    }
+    let b: f64 = vgetq_lane_f64::<LANE>(b);
+    a * b
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_f16)"]
@@ -15775,7 +15747,7 @@ pub fn vmulxq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t) -> floa
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulx_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t) -> float64x1_t {
     static_assert!(LANE == 0);
-    unsafe { vmulx_f64(a, transmute::<f64, _>(simd_extract!(b, LANE as u32))) }
+    unsafe { vmulx_f64(a, transmute(vget_lane_f64::<LANE>(b))) }
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_laneq_f64)"]
@@ -15786,7 +15758,7 @@ pub fn vmulx_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t) -> float6
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulx_laneq_f64<const LANE: i32>(a: float64x1_t, b: float64x2_t) -> float64x1_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmulx_f64(a, transmute::<f64, _>(simd_extract!(b, LANE as u32))) }
+    unsafe { vmulx_f64(a, transmute(vgetq_lane_f64::<LANE>(b))) }
 }
 #[doc = "Vector multiply by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_n_f16)"]
@@ -15849,7 +15821,7 @@ pub fn vmulxs_f32(a: f32, b: f32) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulxd_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> f64 {
     static_assert!(LANE == 0);
-    unsafe { vmulxd_f64(a, simd_extract!(b, LANE as u32)) }
+    vmulxd_f64(a, vget_lane_f64::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxd_laneq_f64)"]
@@ -15860,7 +15832,7 @@ pub fn vmulxd_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> f64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulxd_laneq_f64<const LANE: i32>(a: f64, b: float64x2_t) -> f64 {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmulxd_f64(a, simd_extract!(b, LANE as u32)) }
+    vmulxd_f64(a, vgetq_lane_f64::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxs_lane_f32)"]
@@ -15871,7 +15843,7 @@ pub fn vmulxd_laneq_f64<const LANE: i32>(a: f64, b: float64x2_t) -> f64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulxs_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> f32 {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmulxs_f32(a, simd_extract!(b, LANE as u32)) }
+    vmulxs_f32(a, vget_lane_f32::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxs_laneq_f32)"]
@@ -15882,7 +15854,7 @@ pub fn vmulxs_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulxs_laneq_f32<const LANE: i32>(a: f32, b: float32x4_t) -> f32 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmulxs_f32(a, simd_extract!(b, LANE as u32)) }
+    vmulxs_f32(a, vgetq_lane_f32::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxh_f16)"]
@@ -15911,7 +15883,7 @@ pub fn vmulxh_f16(a: f16, b: f16) -> f16 {
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulxh_lane_f16<const LANE: i32>(a: f16, b: float16x4_t) -> f16 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmulxh_f16(a, simd_extract!(b, LANE as u32)) }
+    vmulxh_f16(a, vget_lane_f16::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxh_laneq_f16)"]
@@ -15923,7 +15895,7 @@ pub fn vmulxh_lane_f16<const LANE: i32>(a: f16, b: float16x4_t) -> f16 {
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulxh_laneq_f16<const LANE: i32>(a: f16, b: float16x8_t) -> f16 {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vmulxh_f16(a, simd_extract!(b, LANE as u32)) }
+    vmulxh_f16(a, vgetq_lane_f16::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_lane_f64)"]
@@ -15998,11 +15970,9 @@ pub fn vnegh_f16(a: f16) -> f16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vpaddd_f64(a: float64x2_t) -> f64 {
-    unsafe {
-        let a1: f64 = simd_extract!(a, 0);
-        let a2: f64 = simd_extract!(a, 1);
-        a1 + a2
-    }
+    let a1: f64 = vgetq_lane_f64::<0>(a);
+    let a2: f64 = vgetq_lane_f64::<1>(a);
+    a1 + a2
 }
 #[doc = "Floating-point add pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadds_f32)"]
@@ -16011,11 +15981,9 @@ pub fn vpaddd_f64(a: float64x2_t) -> f64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vpadds_f32(a: float32x2_t) -> f32 {
-    unsafe {
-        let a1: f32 = simd_extract!(a, 0);
-        let a2: f32 = simd_extract!(a, 1);
-        a1 + a2
-    }
+    let a1: f32 = vget_lane_f32::<0>(a);
+    let a2: f32 = vget_lane_f32::<1>(a);
+    a1 + a2
 }
 #[doc = "Add pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_s64)"]
@@ -16834,7 +16802,7 @@ pub fn vqabsq_s64(a: int64x2_t) -> int64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
 pub fn vqabsb_s8(a: i8) -> i8 {
-    unsafe { simd_extract!(vqabs_s8(vdup_n_s8(a)), 0) }
+    vget_lane_s8::<0>(vqabs_s8(vdup_n_s8(a)))
 }
 #[doc = "Signed saturating absolute value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsh_s16)"]
@@ -16843,7 +16811,7 @@ pub fn vqabsb_s8(a: i8) -> i8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
 pub fn vqabsh_s16(a: i16) -> i16 {
-    unsafe { simd_extract!(vqabs_s16(vdup_n_s16(a)), 0) }
+    vget_lane_s16::<0>(vqabs_s16(vdup_n_s16(a)))
 }
 #[doc = "Signed saturating absolute value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabss_s32)"]
@@ -16886,7 +16854,7 @@ pub fn vqabsd_s64(a: i64) -> i64 {
 pub fn vqaddb_s8(a: i8, b: i8) -> i8 {
     let a: int8x8_t = vdup_n_s8(a);
     let b: int8x8_t = vdup_n_s8(b);
-    unsafe { simd_extract!(vqadd_s8(a, b), 0) }
+    vget_lane_s8::<0>(vqadd_s8(a, b))
 }
 #[doc = "Saturating add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddh_s16)"]
@@ -16897,7 +16865,7 @@ pub fn vqaddb_s8(a: i8, b: i8) -> i8 {
 pub fn vqaddh_s16(a: i16, b: i16) -> i16 {
     let a: int16x4_t = vdup_n_s16(a);
     let b: int16x4_t = vdup_n_s16(b);
-    unsafe { simd_extract!(vqadd_s16(a, b), 0) }
+    vget_lane_s16::<0>(vqadd_s16(a, b))
 }
 #[doc = "Saturating add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddb_u8)"]
@@ -16908,7 +16876,7 @@ pub fn vqaddh_s16(a: i16, b: i16) -> i16 {
 pub fn vqaddb_u8(a: u8, b: u8) -> u8 {
     let a: uint8x8_t = vdup_n_u8(a);
     let b: uint8x8_t = vdup_n_u8(b);
-    unsafe { simd_extract!(vqadd_u8(a, b), 0) }
+    vget_lane_u8::<0>(vqadd_u8(a, b))
 }
 #[doc = "Saturating add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddh_u16)"]
@@ -16919,7 +16887,7 @@ pub fn vqaddb_u8(a: u8, b: u8) -> u8 {
 pub fn vqaddh_u16(a: u16, b: u16) -> u16 {
     let a: uint16x4_t = vdup_n_u16(a);
     let b: uint16x4_t = vdup_n_u16(b);
-    unsafe { simd_extract!(vqadd_u16(a, b), 0) }
+    vget_lane_u16::<0>(vqadd_u16(a, b))
 }
 #[doc = "Saturating add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadds_s32)"]
@@ -17096,7 +17064,7 @@ pub fn vqdmlal_laneq_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t)
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlalh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqdmlalh_s16(a, b, simd_extract!(c, LANE as u32)) }
+    vqdmlalh_s16(a, b, vget_lane_s16::<LANE>(c))
 }
 #[doc = "Signed saturating doubling multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlalh_laneq_s16)"]
@@ -17107,7 +17075,7 @@ pub fn vqdmlalh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlalh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32 {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vqdmlalh_s16(a, b, simd_extract!(c, LANE as u32)) }
+    vqdmlalh_s16(a, b, vgetq_lane_s16::<LANE>(c))
 }
 #[doc = "Signed saturating doubling multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlals_lane_s32)"]
@@ -17118,7 +17086,7 @@ pub fn vqdmlalh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlals_lane_s32<const LANE: i32>(a: i64, b: i32, c: int32x2_t) -> i64 {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vqdmlals_s32(a, b, simd_extract!(c, LANE as u32)) }
+    vqdmlals_s32(a, b, vget_lane_s32::<LANE>(c))
 }
 #[doc = "Signed saturating doubling multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlals_laneq_s32)"]
@@ -17129,7 +17097,7 @@ pub fn vqdmlals_lane_s32<const LANE: i32>(a: i64, b: i32, c: int32x2_t) -> i64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlals_laneq_s32<const LANE: i32>(a: i64, b: i32, c: int32x4_t) -> i64 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqdmlals_s32(a, b, simd_extract!(c, LANE as u32)) }
+    vqdmlals_s32(a, b, vgetq_lane_s32::<LANE>(c))
 }
 #[doc = "Signed saturating doubling multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlalh_s16)"]
@@ -17139,7 +17107,7 @@ pub fn vqdmlals_laneq_s32<const LANE: i32>(a: i64, b: i32, c: int32x4_t) -> i64
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlalh_s16(a: i32, b: i16, c: i16) -> i32 {
     let x: int32x4_t = vqdmull_s16(vdup_n_s16(b), vdup_n_s16(c));
-    unsafe { vqadds_s32(a, simd_extract!(x, 0)) }
+    vqadds_s32(a, vgetq_lane_s32::<0>(x))
 }
 #[doc = "Signed saturating doubling multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlals_s32)"]
@@ -17262,7 +17230,7 @@ pub fn vqdmlsl_laneq_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t)
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlslh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqdmlslh_s16(a, b, simd_extract!(c, LANE as u32)) }
+    vqdmlslh_s16(a, b, vget_lane_s16::<LANE>(c))
 }
 #[doc = "Signed saturating doubling multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlslh_laneq_s16)"]
@@ -17273,7 +17241,7 @@ pub fn vqdmlslh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlslh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32 {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vqdmlslh_s16(a, b, simd_extract!(c, LANE as u32)) }
+    vqdmlslh_s16(a, b, vgetq_lane_s16::<LANE>(c))
 }
 #[doc = "Signed saturating doubling multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsls_lane_s32)"]
@@ -17284,7 +17252,7 @@ pub fn vqdmlslh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsls_lane_s32<const LANE: i32>(a: i64, b: i32, c: int32x2_t) -> i64 {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vqdmlsls_s32(a, b, simd_extract!(c, LANE as u32)) }
+    vqdmlsls_s32(a, b, vget_lane_s32::<LANE>(c))
 }
 #[doc = "Signed saturating doubling multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsls_laneq_s32)"]
@@ -17295,7 +17263,7 @@ pub fn vqdmlsls_lane_s32<const LANE: i32>(a: i64, b: i32, c: int32x2_t) -> i64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsls_laneq_s32<const LANE: i32>(a: i64, b: i32, c: int32x4_t) -> i64 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqdmlsls_s32(a, b, simd_extract!(c, LANE as u32)) }
+    vqdmlsls_s32(a, b, vgetq_lane_s32::<LANE>(c))
 }
 #[doc = "Signed saturating doubling multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlslh_s16)"]
@@ -17305,7 +17273,7 @@ pub fn vqdmlsls_laneq_s32<const LANE: i32>(a: i64, b: i32, c: int32x4_t) -> i64
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlslh_s16(a: i32, b: i16, c: i16) -> i32 {
     let x: int32x4_t = vqdmull_s16(vdup_n_s16(b), vdup_n_s16(c));
-    unsafe { vqsubs_s32(a, simd_extract!(x, 0)) }
+    vqsubs_s32(a, vgetq_lane_s32::<0>(x))
 }
 #[doc = "Signed saturating doubling multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsls_s32)"]
@@ -17326,7 +17294,7 @@ pub fn vqdmlsls_s32(a: i64, b: i32, c: i32) -> i64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqdmulh_s16(a, vdup_n_s16(simd_extract!(b, LANE as u32))) }
+    vqdmulh_s16(a, vdup_n_s16(vget_lane_s16::<LANE>(b)))
 }
 #[doc = "Vector saturating doubling multiply high by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_lane_s16)"]
@@ -17337,7 +17305,7 @@ pub fn vqdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqdmulhq_s16(a, vdupq_n_s16(simd_extract!(b, LANE as u32))) }
+    vqdmulhq_s16(a, vdupq_n_s16(vget_lane_s16::<LANE>(b)))
 }
 #[doc = "Vector saturating doubling multiply high by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_lane_s32)"]
@@ -17348,7 +17316,7 @@ pub fn vqdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vqdmulh_s32(a, vdup_n_s32(simd_extract!(b, LANE as u32))) }
+    vqdmulh_s32(a, vdup_n_s32(vget_lane_s32::<LANE>(b)))
 }
 #[doc = "Vector saturating doubling multiply high by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_lane_s32)"]
@@ -17359,7 +17327,7 @@ pub fn vqdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vqdmulhq_s32(a, vdupq_n_s32(simd_extract!(b, LANE as u32))) }
+    vqdmulhq_s32(a, vdupq_n_s32(vget_lane_s32::<LANE>(b)))
 }
 #[doc = "Signed saturating doubling multiply returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhh_lane_s16)"]
@@ -17370,10 +17338,8 @@ pub fn vqdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmulhh_lane_s16<const N: i32>(a: i16, b: int16x4_t) -> i16 {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        let b: i16 = simd_extract!(b, N as u32);
-        vqdmulhh_s16(a, b)
-    }
+    let b: i16 = vget_lane_s16::<N>(b);
+    vqdmulhh_s16(a, b)
 }
 #[doc = "Signed saturating doubling multiply returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhh_laneq_s16)"]
@@ -17384,10 +17350,8 @@ pub fn vqdmulhh_lane_s16<const N: i32>(a: i16, b: int16x4_t) -> i16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmulhh_laneq_s16<const N: i32>(a: i16, b: int16x8_t) -> i16 {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        let b: i16 = simd_extract!(b, N as u32);
-        vqdmulhh_s16(a, b)
-    }
+    let b: i16 = vgetq_lane_s16::<N>(b);
+    vqdmulhh_s16(a, b)
 }
 #[doc = "Signed saturating doubling multiply returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhh_s16)"]
@@ -17398,7 +17362,7 @@ pub fn vqdmulhh_laneq_s16<const N: i32>(a: i16, b: int16x8_t) -> i16 {
 pub fn vqdmulhh_s16(a: i16, b: i16) -> i16 {
     let a: int16x4_t = vdup_n_s16(a);
     let b: int16x4_t = vdup_n_s16(b);
-    unsafe { simd_extract!(vqdmulh_s16(a, b), 0) }
+    vget_lane_s16::<0>(vqdmulh_s16(a, b))
 }
 #[doc = "Signed saturating doubling multiply returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhs_s32)"]
@@ -17409,7 +17373,7 @@ pub fn vqdmulhh_s16(a: i16, b: i16) -> i16 {
 pub fn vqdmulhs_s32(a: i32, b: i32) -> i32 {
     let a: int32x2_t = vdup_n_s32(a);
     let b: int32x2_t = vdup_n_s32(b);
-    unsafe { simd_extract!(vqdmulh_s32(a, b), 0) }
+    vget_lane_s32::<0>(vqdmulh_s32(a, b))
 }
 #[doc = "Signed saturating doubling multiply returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhs_lane_s32)"]
@@ -17420,10 +17384,8 @@ pub fn vqdmulhs_s32(a: i32, b: i32) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmulhs_lane_s32<const N: i32>(a: i32, b: int32x2_t) -> i32 {
     static_assert_uimm_bits!(N, 1);
-    unsafe {
-        let b: i32 = simd_extract!(b, N as u32);
-        vqdmulhs_s32(a, b)
-    }
+    let b: i32 = vget_lane_s32::<N>(b);
+    vqdmulhs_s32(a, b)
 }
 #[doc = "Signed saturating doubling multiply returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhs_laneq_s32)"]
@@ -17434,10 +17396,8 @@ pub fn vqdmulhs_lane_s32<const N: i32>(a: i32, b: int32x2_t) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmulhs_laneq_s32<const N: i32>(a: i32, b: int32x4_t) -> i32 {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        let b: i32 = simd_extract!(b, N as u32);
-        vqdmulhs_s32(a, b)
-    }
+    let b: i32 = vgetq_lane_s32::<N>(b);
+    vqdmulhs_s32(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_lane_s16)"]
@@ -17588,10 +17548,8 @@ pub fn vqdmull_laneq_s32<const N: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmullh_lane_s16<const N: i32>(a: i16, b: int16x4_t) -> i32 {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        let b: i16 = simd_extract!(b, N as u32);
-        vqdmullh_s16(a, b)
-    }
+    let b: i16 = vget_lane_s16::<N>(b);
+    vqdmullh_s16(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulls_laneq_s32)"]
@@ -17602,10 +17560,8 @@ pub fn vqdmullh_lane_s16<const N: i32>(a: i16, b: int16x4_t) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmulls_laneq_s32<const N: i32>(a: i32, b: int32x4_t) -> i64 {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        let b: i32 = simd_extract!(b, N as u32);
-        vqdmulls_s32(a, b)
-    }
+    let b: i32 = vgetq_lane_s32::<N>(b);
+    vqdmulls_s32(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmullh_laneq_s16)"]
@@ -17616,10 +17572,8 @@ pub fn vqdmulls_laneq_s32<const N: i32>(a: i32, b: int32x4_t) -> i64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmullh_laneq_s16<const N: i32>(a: i16, b: int16x8_t) -> i32 {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        let b: i16 = simd_extract!(b, N as u32);
-        vqdmullh_s16(a, b)
-    }
+    let b: i16 = vgetq_lane_s16::<N>(b);
+    vqdmullh_s16(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmullh_s16)"]
@@ -17630,7 +17584,7 @@ pub fn vqdmullh_laneq_s16<const N: i32>(a: i16, b: int16x8_t) -> i32 {
 pub fn vqdmullh_s16(a: i16, b: i16) -> i32 {
     let a: int16x4_t = vdup_n_s16(a);
     let b: int16x4_t = vdup_n_s16(b);
-    unsafe { simd_extract!(vqdmull_s16(a, b), 0) }
+    vgetq_lane_s32::<0>(vqdmull_s16(a, b))
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulls_lane_s32)"]
@@ -17641,10 +17595,8 @@ pub fn vqdmullh_s16(a: i16, b: i16) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmulls_lane_s32<const N: i32>(a: i32, b: int32x2_t) -> i64 {
     static_assert_uimm_bits!(N, 1);
-    unsafe {
-        let b: i32 = simd_extract!(b, N as u32);
-        vqdmulls_s32(a, b)
-    }
+    let b: i32 = vget_lane_s32::<N>(b);
+    vqdmulls_s32(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulls_s32)"]
@@ -17767,7 +17719,7 @@ pub fn vqmovnd_u64(a: u64) -> u32 {
 #[cfg_attr(test, assert_instr(sqxtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovnh_s16(a: i16) -> i8 {
-    unsafe { simd_extract!(vqmovn_s16(vdupq_n_s16(a)), 0) }
+    vget_lane_s8::<0>(vqmovn_s16(vdupq_n_s16(a)))
 }
 #[doc = "Saturating extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovns_s32)"]
@@ -17776,7 +17728,7 @@ pub fn vqmovnh_s16(a: i16) -> i8 {
 #[cfg_attr(test, assert_instr(sqxtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovns_s32(a: i32) -> i16 {
-    unsafe { simd_extract!(vqmovn_s32(vdupq_n_s32(a)), 0) }
+    vget_lane_s16::<0>(vqmovn_s32(vdupq_n_s32(a)))
 }
 #[doc = "Saturating extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovnh_u16)"]
@@ -17785,7 +17737,7 @@ pub fn vqmovns_s32(a: i32) -> i16 {
 #[cfg_attr(test, assert_instr(uqxtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovnh_u16(a: u16) -> u8 {
-    unsafe { simd_extract!(vqmovn_u16(vdupq_n_u16(a)), 0) }
+    vget_lane_u8::<0>(vqmovn_u16(vdupq_n_u16(a)))
 }
 #[doc = "Saturating extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovns_u32)"]
@@ -17794,7 +17746,7 @@ pub fn vqmovnh_u16(a: u16) -> u8 {
 #[cfg_attr(test, assert_instr(uqxtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovns_u32(a: u32) -> u16 {
-    unsafe { simd_extract!(vqmovn_u32(vdupq_n_u32(a)), 0) }
+    vget_lane_u16::<0>(vqmovn_u32(vdupq_n_u32(a)))
 }
 #[doc = "Signed saturating extract unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_high_s16)"]
@@ -17836,7 +17788,7 @@ pub fn vqmovun_high_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
 #[cfg_attr(test, assert_instr(sqxtun))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovunh_s16(a: i16) -> u8 {
-    unsafe { simd_extract!(vqmovun_s16(vdupq_n_s16(a)), 0) }
+    vget_lane_u8::<0>(vqmovun_s16(vdupq_n_s16(a)))
 }
 #[doc = "Signed saturating extract unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovuns_s32)"]
@@ -17845,7 +17797,7 @@ pub fn vqmovunh_s16(a: i16) -> u8 {
 #[cfg_attr(test, assert_instr(sqxtun))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovuns_s32(a: i32) -> u16 {
-    unsafe { simd_extract!(vqmovun_s32(vdupq_n_s32(a)), 0) }
+    vget_lane_u16::<0>(vqmovun_s32(vdupq_n_s32(a)))
 }
 #[doc = "Signed saturating extract unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovund_s64)"]
@@ -17854,7 +17806,7 @@ pub fn vqmovuns_s32(a: i32) -> u16 {
 #[cfg_attr(test, assert_instr(sqxtun))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovund_s64(a: i64) -> u32 {
-    unsafe { simd_extract!(vqmovun_s64(vdupq_n_s64(a)), 0) }
+    vget_lane_u32::<0>(vqmovun_s64(vdupq_n_s64(a)))
 }
 #[doc = "Signed saturating negate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s64)"]
@@ -17895,7 +17847,7 @@ pub fn vqnegq_s64(a: int64x2_t) -> int64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(sqneg))]
 pub fn vqnegb_s8(a: i8) -> i8 {
-    unsafe { simd_extract!(vqneg_s8(vdup_n_s8(a)), 0) }
+    vget_lane_s8::<0>(vqneg_s8(vdup_n_s8(a)))
 }
 #[doc = "Signed saturating negate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegh_s16)"]
@@ -17904,7 +17856,7 @@ pub fn vqnegb_s8(a: i8) -> i8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(sqneg))]
 pub fn vqnegh_s16(a: i16) -> i16 {
-    unsafe { simd_extract!(vqneg_s16(vdup_n_s16(a)), 0) }
+    vget_lane_s16::<0>(vqneg_s16(vdup_n_s16(a)))
 }
 #[doc = "Signed saturating negate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegs_s32)"]
@@ -17913,7 +17865,7 @@ pub fn vqnegh_s16(a: i16) -> i16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(sqneg))]
 pub fn vqnegs_s32(a: i32) -> i32 {
-    unsafe { simd_extract!(vqneg_s32(vdup_n_s32(a)), 0) }
+    vget_lane_s32::<0>(vqneg_s32(vdup_n_s32(a)))
 }
 #[doc = "Signed saturating negate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegd_s64)"]
@@ -17922,7 +17874,7 @@ pub fn vqnegs_s32(a: i32) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(sqneg))]
 pub fn vqnegd_s64(a: i64) -> i64 {
-    unsafe { simd_extract!(vqneg_s64(vdup_n_s64(a)), 0) }
+    vget_lane_s64::<0>(vqneg_s64(vdup_n_s64(a)))
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_lane_s16)"]
@@ -18109,7 +18061,7 @@ pub fn vqrdmlahq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlahh_lane_s16<const LANE: i32>(a: i16, b: i16, c: int16x4_t) -> i16 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqrdmlahh_s16(a, b, simd_extract!(c, LANE as u32)) }
+    vqrdmlahh_s16(a, b, vget_lane_s16::<LANE>(c))
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahh_laneq_s16)"]
@@ -18120,7 +18072,7 @@ pub fn vqrdmlahh_lane_s16<const LANE: i32>(a: i16, b: i16, c: int16x4_t) -> i16
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlahh_laneq_s16<const LANE: i32>(a: i16, b: i16, c: int16x8_t) -> i16 {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vqrdmlahh_s16(a, b, simd_extract!(c, LANE as u32)) }
+    vqrdmlahh_s16(a, b, vgetq_lane_s16::<LANE>(c))
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahs_lane_s32)"]
@@ -18131,7 +18083,7 @@ pub fn vqrdmlahh_laneq_s16<const LANE: i32>(a: i16, b: i16, c: int16x8_t) -> i16
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlahs_lane_s32<const LANE: i32>(a: i32, b: i32, c: int32x2_t) -> i32 {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vqrdmlahs_s32(a, b, simd_extract!(c, LANE as u32)) }
+    vqrdmlahs_s32(a, b, vget_lane_s32::<LANE>(c))
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahs_laneq_s32)"]
@@ -18142,7 +18094,7 @@ pub fn vqrdmlahs_lane_s32<const LANE: i32>(a: i32, b: i32, c: int32x2_t) -> i32
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlahs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t) -> i32 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqrdmlahs_s32(a, b, simd_extract!(c, LANE as u32)) }
+    vqrdmlahs_s32(a, b, vgetq_lane_s32::<LANE>(c))
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahh_s16)"]
@@ -18154,7 +18106,7 @@ pub fn vqrdmlahh_s16(a: i16, b: i16, c: i16) -> i16 {
     let a: int16x4_t = vdup_n_s16(a);
     let b: int16x4_t = vdup_n_s16(b);
     let c: int16x4_t = vdup_n_s16(c);
-    unsafe { simd_extract!(vqrdmlah_s16(a, b, c), 0) }
+    vget_lane_s16::<0>(vqrdmlah_s16(a, b, c))
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahs_s32)"]
@@ -18166,7 +18118,7 @@ pub fn vqrdmlahs_s32(a: i32, b: i32, c: i32) -> i32 {
     let a: int32x2_t = vdup_n_s32(a);
     let b: int32x2_t = vdup_n_s32(b);
     let c: int32x2_t = vdup_n_s32(c);
-    unsafe { simd_extract!(vqrdmlah_s32(a, b, c), 0) }
+    vget_lane_s32::<0>(vqrdmlah_s32(a, b, c))
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_lane_s16)"]
@@ -18353,7 +18305,7 @@ pub fn vqrdmlshq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlshh_lane_s16<const LANE: i32>(a: i16, b: i16, c: int16x4_t) -> i16 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqrdmlshh_s16(a, b, simd_extract!(c, LANE as u32)) }
+    vqrdmlshh_s16(a, b, vget_lane_s16::<LANE>(c))
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshh_laneq_s16)"]
@@ -18364,7 +18316,7 @@ pub fn vqrdmlshh_lane_s16<const LANE: i32>(a: i16, b: i16, c: int16x4_t) -> i16
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlshh_laneq_s16<const LANE: i32>(a: i16, b: i16, c: int16x8_t) -> i16 {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vqrdmlshh_s16(a, b, simd_extract!(c, LANE as u32)) }
+    vqrdmlshh_s16(a, b, vgetq_lane_s16::<LANE>(c))
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshs_lane_s32)"]
@@ -18375,7 +18327,7 @@ pub fn vqrdmlshh_laneq_s16<const LANE: i32>(a: i16, b: i16, c: int16x8_t) -> i16
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlshs_lane_s32<const LANE: i32>(a: i32, b: i32, c: int32x2_t) -> i32 {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vqrdmlshs_s32(a, b, simd_extract!(c, LANE as u32)) }
+    vqrdmlshs_s32(a, b, vget_lane_s32::<LANE>(c))
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshs_laneq_s32)"]
@@ -18386,7 +18338,7 @@ pub fn vqrdmlshs_lane_s32<const LANE: i32>(a: i32, b: i32, c: int32x2_t) -> i32
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlshs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t) -> i32 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqrdmlshs_s32(a, b, simd_extract!(c, LANE as u32)) }
+    vqrdmlshs_s32(a, b, vgetq_lane_s32::<LANE>(c))
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshh_s16)"]
@@ -18398,7 +18350,7 @@ pub fn vqrdmlshh_s16(a: i16, b: i16, c: i16) -> i16 {
     let a: int16x4_t = vdup_n_s16(a);
     let b: int16x4_t = vdup_n_s16(b);
     let c: int16x4_t = vdup_n_s16(c);
-    unsafe { simd_extract!(vqrdmlsh_s16(a, b, c), 0) }
+    vget_lane_s16::<0>(vqrdmlsh_s16(a, b, c))
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshs_s32)"]
@@ -18410,7 +18362,7 @@ pub fn vqrdmlshs_s32(a: i32, b: i32, c: i32) -> i32 {
     let a: int32x2_t = vdup_n_s32(a);
     let b: int32x2_t = vdup_n_s32(b);
     let c: int32x2_t = vdup_n_s32(c);
-    unsafe { simd_extract!(vqrdmlsh_s32(a, b, c), 0) }
+    vget_lane_s32::<0>(vqrdmlsh_s32(a, b, c))
 }
 #[doc = "Signed saturating rounding doubling multiply returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhh_lane_s16)"]
@@ -18421,7 +18373,7 @@ pub fn vqrdmlshs_s32(a: i32, b: i32, c: i32) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrdmulhh_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> i16 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqrdmulhh_s16(a, simd_extract!(b, LANE as u32)) }
+    vqrdmulhh_s16(a, vget_lane_s16::<LANE>(b))
 }
 #[doc = "Signed saturating rounding doubling multiply returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhh_laneq_s16)"]
@@ -18432,7 +18384,7 @@ pub fn vqrdmulhh_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> i16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrdmulhh_laneq_s16<const LANE: i32>(a: i16, b: int16x8_t) -> i16 {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vqrdmulhh_s16(a, simd_extract!(b, LANE as u32)) }
+    vqrdmulhh_s16(a, vgetq_lane_s16::<LANE>(b))
 }
 #[doc = "Signed saturating rounding doubling multiply returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhs_lane_s32)"]
@@ -18443,7 +18395,7 @@ pub fn vqrdmulhh_laneq_s16<const LANE: i32>(a: i16, b: int16x8_t) -> i16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrdmulhs_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> i32 {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vqrdmulhs_s32(a, simd_extract!(b, LANE as u32)) }
+    vqrdmulhs_s32(a, vget_lane_s32::<LANE>(b))
 }
 #[doc = "Signed saturating rounding doubling multiply returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhs_laneq_s32)"]
@@ -18454,7 +18406,7 @@ pub fn vqrdmulhs_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrdmulhs_laneq_s32<const LANE: i32>(a: i32, b: int32x4_t) -> i32 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqrdmulhs_s32(a, simd_extract!(b, LANE as u32)) }
+    vqrdmulhs_s32(a, vgetq_lane_s32::<LANE>(b))
 }
 #[doc = "Signed saturating rounding doubling multiply returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhh_s16)"]
@@ -18463,7 +18415,7 @@ pub fn vqrdmulhs_laneq_s32<const LANE: i32>(a: i32, b: int32x4_t) -> i32 {
 #[cfg_attr(test, assert_instr(sqrdmulh))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrdmulhh_s16(a: i16, b: i16) -> i16 {
-    unsafe { simd_extract!(vqrdmulh_s16(vdup_n_s16(a), vdup_n_s16(b)), 0) }
+    vget_lane_s16::<0>(vqrdmulh_s16(vdup_n_s16(a), vdup_n_s16(b)))
 }
 #[doc = "Signed saturating rounding doubling multiply returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhs_s32)"]
@@ -18472,7 +18424,7 @@ pub fn vqrdmulhh_s16(a: i16, b: i16) -> i16 {
 #[cfg_attr(test, assert_instr(sqrdmulh))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrdmulhs_s32(a: i32, b: i32) -> i32 {
-    unsafe { simd_extract!(vqrdmulh_s32(vdup_n_s32(a), vdup_n_s32(b)), 0) }
+    vget_lane_s32::<0>(vqrdmulh_s32(vdup_n_s32(a), vdup_n_s32(b)))
 }
 #[doc = "Signed saturating rounding shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlb_s8)"]
@@ -18483,7 +18435,7 @@ pub fn vqrdmulhs_s32(a: i32, b: i32) -> i32 {
 pub fn vqrshlb_s8(a: i8, b: i8) -> i8 {
     let a: int8x8_t = vdup_n_s8(a);
     let b: int8x8_t = vdup_n_s8(b);
-    unsafe { simd_extract!(vqrshl_s8(a, b), 0) }
+    vget_lane_s8::<0>(vqrshl_s8(a, b))
 }
 #[doc = "Signed saturating rounding shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlh_s16)"]
@@ -18494,7 +18446,7 @@ pub fn vqrshlb_s8(a: i8, b: i8) -> i8 {
 pub fn vqrshlh_s16(a: i16, b: i16) -> i16 {
     let a: int16x4_t = vdup_n_s16(a);
     let b: int16x4_t = vdup_n_s16(b);
-    unsafe { simd_extract!(vqrshl_s16(a, b), 0) }
+    vget_lane_s16::<0>(vqrshl_s16(a, b))
 }
 #[doc = "Unsigned signed saturating rounding shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlb_u8)"]
@@ -18505,7 +18457,7 @@ pub fn vqrshlh_s16(a: i16, b: i16) -> i16 {
 pub fn vqrshlb_u8(a: u8, b: i8) -> u8 {
     let a: uint8x8_t = vdup_n_u8(a);
     let b: int8x8_t = vdup_n_s8(b);
-    unsafe { simd_extract!(vqrshl_u8(a, b), 0) }
+    vget_lane_u8::<0>(vqrshl_u8(a, b))
 }
 #[doc = "Unsigned signed saturating rounding shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlh_u16)"]
@@ -18516,7 +18468,7 @@ pub fn vqrshlb_u8(a: u8, b: i8) -> u8 {
 pub fn vqrshlh_u16(a: u16, b: i16) -> u16 {
     let a: uint16x4_t = vdup_n_u16(a);
     let b: int16x4_t = vdup_n_s16(b);
-    unsafe { simd_extract!(vqrshl_u16(a, b), 0) }
+    vget_lane_u16::<0>(vqrshl_u16(a, b))
 }
 #[doc = "Signed saturating rounding shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshld_s64)"]
@@ -18670,7 +18622,7 @@ pub fn vqrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x
 pub fn vqrshrnd_n_u64<const N: i32>(a: u64) -> u32 {
     static_assert!(N >= 1 && N <= 32);
     let a: uint64x2_t = vdupq_n_u64(a);
-    unsafe { simd_extract!(vqrshrn_n_u64::<N>(a), 0) }
+    vget_lane_u32::<0>(vqrshrn_n_u64::<N>(a))
 }
 #[doc = "Unsigned saturating rounded shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnh_n_u16)"]
@@ -18682,7 +18634,7 @@ pub fn vqrshrnd_n_u64<const N: i32>(a: u64) -> u32 {
 pub fn vqrshrnh_n_u16<const N: i32>(a: u16) -> u8 {
     static_assert!(N >= 1 && N <= 8);
     let a: uint16x8_t = vdupq_n_u16(a);
-    unsafe { simd_extract!(vqrshrn_n_u16::<N>(a), 0) }
+    vget_lane_u8::<0>(vqrshrn_n_u16::<N>(a))
 }
 #[doc = "Unsigned saturating rounded shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrns_n_u32)"]
@@ -18694,7 +18646,7 @@ pub fn vqrshrnh_n_u16<const N: i32>(a: u16) -> u8 {
 pub fn vqrshrns_n_u32<const N: i32>(a: u32) -> u16 {
     static_assert!(N >= 1 && N <= 16);
     let a: uint32x4_t = vdupq_n_u32(a);
-    unsafe { simd_extract!(vqrshrn_n_u32::<N>(a), 0) }
+    vget_lane_u16::<0>(vqrshrn_n_u32::<N>(a))
 }
 #[doc = "Signed saturating rounded shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnh_n_s16)"]
@@ -18706,7 +18658,7 @@ pub fn vqrshrns_n_u32<const N: i32>(a: u32) -> u16 {
 pub fn vqrshrnh_n_s16<const N: i32>(a: i16) -> i8 {
     static_assert!(N >= 1 && N <= 8);
     let a: int16x8_t = vdupq_n_s16(a);
-    unsafe { simd_extract!(vqrshrn_n_s16::<N>(a), 0) }
+    vget_lane_s8::<0>(vqrshrn_n_s16::<N>(a))
 }
 #[doc = "Signed saturating rounded shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrns_n_s32)"]
@@ -18718,7 +18670,7 @@ pub fn vqrshrnh_n_s16<const N: i32>(a: i16) -> i8 {
 pub fn vqrshrns_n_s32<const N: i32>(a: i32) -> i16 {
     static_assert!(N >= 1 && N <= 16);
     let a: int32x4_t = vdupq_n_s32(a);
-    unsafe { simd_extract!(vqrshrn_n_s32::<N>(a), 0) }
+    vget_lane_s16::<0>(vqrshrn_n_s32::<N>(a))
 }
 #[doc = "Signed saturating rounded shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnd_n_s64)"]
@@ -18730,7 +18682,7 @@ pub fn vqrshrns_n_s32<const N: i32>(a: i32) -> i16 {
 pub fn vqrshrnd_n_s64<const N: i32>(a: i64) -> i32 {
     static_assert!(N >= 1 && N <= 32);
     let a: int64x2_t = vdupq_n_s64(a);
-    unsafe { simd_extract!(vqrshrn_n_s64::<N>(a), 0) }
+    vget_lane_s32::<0>(vqrshrn_n_s64::<N>(a))
 }
 #[doc = "Signed saturating rounded shift right unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s16)"]
@@ -18781,7 +18733,7 @@ pub fn vqrshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x
 pub fn vqrshrund_n_s64<const N: i32>(a: i64) -> u32 {
     static_assert!(N >= 1 && N <= 32);
     let a: int64x2_t = vdupq_n_s64(a);
-    unsafe { simd_extract!(vqrshrun_n_s64::<N>(a), 0) }
+    vget_lane_u32::<0>(vqrshrun_n_s64::<N>(a))
 }
 #[doc = "Signed saturating rounded shift right unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrunh_n_s16)"]
@@ -18793,7 +18745,7 @@ pub fn vqrshrund_n_s64<const N: i32>(a: i64) -> u32 {
 pub fn vqrshrunh_n_s16<const N: i32>(a: i16) -> u8 {
     static_assert!(N >= 1 && N <= 8);
     let a: int16x8_t = vdupq_n_s16(a);
-    unsafe { simd_extract!(vqrshrun_n_s16::<N>(a), 0) }
+    vget_lane_u8::<0>(vqrshrun_n_s16::<N>(a))
 }
 #[doc = "Signed saturating rounded shift right unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshruns_n_s32)"]
@@ -18805,7 +18757,7 @@ pub fn vqrshrunh_n_s16<const N: i32>(a: i16) -> u8 {
 pub fn vqrshruns_n_s32<const N: i32>(a: i32) -> u16 {
     static_assert!(N >= 1 && N <= 16);
     let a: int32x4_t = vdupq_n_s32(a);
-    unsafe { simd_extract!(vqrshrun_n_s32::<N>(a), 0) }
+    vget_lane_u16::<0>(vqrshrun_n_s32::<N>(a))
 }
 #[doc = "Signed saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_n_s8)"]
@@ -18816,7 +18768,7 @@ pub fn vqrshruns_n_s32<const N: i32>(a: i32) -> u16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshlb_n_s8<const N: i32>(a: i8) -> i8 {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_extract!(vqshl_n_s8::<N>(vdup_n_s8(a)), 0) }
+    vget_lane_s8::<0>(vqshl_n_s8::<N>(vdup_n_s8(a)))
 }
 #[doc = "Signed saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshld_n_s64)"]
@@ -18827,7 +18779,7 @@ pub fn vqshlb_n_s8<const N: i32>(a: i8) -> i8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshld_n_s64<const N: i32>(a: i64) -> i64 {
     static_assert_uimm_bits!(N, 6);
-    unsafe { simd_extract!(vqshl_n_s64::<N>(vdup_n_s64(a)), 0) }
+    vget_lane_s64::<0>(vqshl_n_s64::<N>(vdup_n_s64(a)))
 }
 #[doc = "Signed saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_n_s16)"]
@@ -18838,7 +18790,7 @@ pub fn vqshld_n_s64<const N: i32>(a: i64) -> i64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshlh_n_s16<const N: i32>(a: i16) -> i16 {
     static_assert_uimm_bits!(N, 4);
-    unsafe { simd_extract!(vqshl_n_s16::<N>(vdup_n_s16(a)), 0) }
+    vget_lane_s16::<0>(vqshl_n_s16::<N>(vdup_n_s16(a)))
 }
 #[doc = "Signed saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_n_s32)"]
@@ -18849,7 +18801,7 @@ pub fn vqshlh_n_s16<const N: i32>(a: i16) -> i16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshls_n_s32<const N: i32>(a: i32) -> i32 {
     static_assert_uimm_bits!(N, 5);
-    unsafe { simd_extract!(vqshl_n_s32::<N>(vdup_n_s32(a)), 0) }
+    vget_lane_s32::<0>(vqshl_n_s32::<N>(vdup_n_s32(a)))
 }
 #[doc = "Unsigned saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_n_u8)"]
@@ -18860,7 +18812,7 @@ pub fn vqshls_n_s32<const N: i32>(a: i32) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshlb_n_u8<const N: i32>(a: u8) -> u8 {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_extract!(vqshl_n_u8::<N>(vdup_n_u8(a)), 0) }
+    vget_lane_u8::<0>(vqshl_n_u8::<N>(vdup_n_u8(a)))
 }
 #[doc = "Unsigned saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshld_n_u64)"]
@@ -18871,7 +18823,7 @@ pub fn vqshlb_n_u8<const N: i32>(a: u8) -> u8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshld_n_u64<const N: i32>(a: u64) -> u64 {
     static_assert_uimm_bits!(N, 6);
-    unsafe { simd_extract!(vqshl_n_u64::<N>(vdup_n_u64(a)), 0) }
+    vget_lane_u64::<0>(vqshl_n_u64::<N>(vdup_n_u64(a)))
 }
 #[doc = "Unsigned saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_n_u16)"]
@@ -18882,7 +18834,7 @@ pub fn vqshld_n_u64<const N: i32>(a: u64) -> u64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshlh_n_u16<const N: i32>(a: u16) -> u16 {
     static_assert_uimm_bits!(N, 4);
-    unsafe { simd_extract!(vqshl_n_u16::<N>(vdup_n_u16(a)), 0) }
+    vget_lane_u16::<0>(vqshl_n_u16::<N>(vdup_n_u16(a)))
 }
 #[doc = "Unsigned saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_n_u32)"]
@@ -18893,7 +18845,7 @@ pub fn vqshlh_n_u16<const N: i32>(a: u16) -> u16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshls_n_u32<const N: i32>(a: u32) -> u32 {
     static_assert_uimm_bits!(N, 5);
-    unsafe { simd_extract!(vqshl_n_u32::<N>(vdup_n_u32(a)), 0) }
+    vget_lane_u32::<0>(vqshl_n_u32::<N>(vdup_n_u32(a)))
 }
 #[doc = "Signed saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_s8)"]
@@ -18903,7 +18855,7 @@ pub fn vqshls_n_u32<const N: i32>(a: u32) -> u32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshlb_s8(a: i8, b: i8) -> i8 {
     let c: int8x8_t = vqshl_s8(vdup_n_s8(a), vdup_n_s8(b));
-    unsafe { simd_extract!(c, 0) }
+    vget_lane_s8::<0>(c)
 }
 #[doc = "Signed saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_s16)"]
@@ -18913,7 +18865,7 @@ pub fn vqshlb_s8(a: i8, b: i8) -> i8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshlh_s16(a: i16, b: i16) -> i16 {
     let c: int16x4_t = vqshl_s16(vdup_n_s16(a), vdup_n_s16(b));
-    unsafe { simd_extract!(c, 0) }
+    vget_lane_s16::<0>(c)
 }
 #[doc = "Signed saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_s32)"]
@@ -18923,7 +18875,7 @@ pub fn vqshlh_s16(a: i16, b: i16) -> i16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshls_s32(a: i32, b: i32) -> i32 {
     let c: int32x2_t = vqshl_s32(vdup_n_s32(a), vdup_n_s32(b));
-    unsafe { simd_extract!(c, 0) }
+    vget_lane_s32::<0>(c)
 }
 #[doc = "Unsigned saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_u8)"]
@@ -18933,7 +18885,7 @@ pub fn vqshls_s32(a: i32, b: i32) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshlb_u8(a: u8, b: i8) -> u8 {
     let c: uint8x8_t = vqshl_u8(vdup_n_u8(a), vdup_n_s8(b));
-    unsafe { simd_extract!(c, 0) }
+    vget_lane_u8::<0>(c)
 }
 #[doc = "Unsigned saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_u16)"]
@@ -18943,7 +18895,7 @@ pub fn vqshlb_u8(a: u8, b: i8) -> u8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshlh_u16(a: u16, b: i16) -> u16 {
     let c: uint16x4_t = vqshl_u16(vdup_n_u16(a), vdup_n_s16(b));
-    unsafe { simd_extract!(c, 0) }
+    vget_lane_u16::<0>(c)
 }
 #[doc = "Unsigned saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_u32)"]
@@ -18953,7 +18905,7 @@ pub fn vqshlh_u16(a: u16, b: i16) -> u16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshls_u32(a: u32, b: i32) -> u32 {
     let c: uint32x2_t = vqshl_u32(vdup_n_u32(a), vdup_n_s32(b));
-    unsafe { simd_extract!(c, 0) }
+    vget_lane_u32::<0>(c)
 }
 #[doc = "Signed saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshld_s64)"]
@@ -18996,7 +18948,7 @@ pub fn vqshld_u64(a: u64, b: i64) -> u64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshlub_n_s8<const N: i32>(a: i8) -> u8 {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_extract!(vqshlu_n_s8::<N>(vdup_n_s8(a)), 0) }
+    vget_lane_u8::<0>(vqshlu_n_s8::<N>(vdup_n_s8(a)))
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlud_n_s64)"]
@@ -19007,7 +18959,7 @@ pub fn vqshlub_n_s8<const N: i32>(a: i8) -> u8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshlud_n_s64<const N: i32>(a: i64) -> u64 {
     static_assert_uimm_bits!(N, 6);
-    unsafe { simd_extract!(vqshlu_n_s64::<N>(vdup_n_s64(a)), 0) }
+    vget_lane_u64::<0>(vqshlu_n_s64::<N>(vdup_n_s64(a)))
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluh_n_s16)"]
@@ -19018,7 +18970,7 @@ pub fn vqshlud_n_s64<const N: i32>(a: i64) -> u64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshluh_n_s16<const N: i32>(a: i16) -> u16 {
     static_assert_uimm_bits!(N, 4);
-    unsafe { simd_extract!(vqshlu_n_s16::<N>(vdup_n_s16(a)), 0) }
+    vget_lane_u16::<0>(vqshlu_n_s16::<N>(vdup_n_s16(a)))
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlus_n_s32)"]
@@ -19029,7 +18981,7 @@ pub fn vqshluh_n_s16<const N: i32>(a: i16) -> u16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshlus_n_s32<const N: i32>(a: i32) -> u32 {
     static_assert_uimm_bits!(N, 5);
-    unsafe { simd_extract!(vqshlu_n_s32::<N>(vdup_n_s32(a)), 0) }
+    vget_lane_u32::<0>(vqshlu_n_s32::<N>(vdup_n_s32(a)))
 }
 #[doc = "Signed saturating shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s16)"]
@@ -19154,7 +19106,7 @@ pub fn vqshrnd_n_u64<const N: i32>(a: u64) -> u32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrnh_n_s16<const N: i32>(a: i16) -> i8 {
     static_assert!(N >= 1 && N <= 8);
-    unsafe { simd_extract!(vqshrn_n_s16::<N>(vdupq_n_s16(a)), 0) }
+    vget_lane_s8::<0>(vqshrn_n_s16::<N>(vdupq_n_s16(a)))
 }
 #[doc = "Signed saturating shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrns_n_s32)"]
@@ -19165,7 +19117,7 @@ pub fn vqshrnh_n_s16<const N: i32>(a: i16) -> i8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrns_n_s32<const N: i32>(a: i32) -> i16 {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_extract!(vqshrn_n_s32::<N>(vdupq_n_s32(a)), 0) }
+    vget_lane_s16::<0>(vqshrn_n_s32::<N>(vdupq_n_s32(a)))
 }
 #[doc = "Unsigned saturating shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrnh_n_u16)"]
@@ -19176,7 +19128,7 @@ pub fn vqshrns_n_s32<const N: i32>(a: i32) -> i16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrnh_n_u16<const N: i32>(a: u16) -> u8 {
     static_assert!(N >= 1 && N <= 8);
-    unsafe { simd_extract!(vqshrn_n_u16::<N>(vdupq_n_u16(a)), 0) }
+    vget_lane_u8::<0>(vqshrn_n_u16::<N>(vdupq_n_u16(a)))
 }
 #[doc = "Unsigned saturating shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrns_n_u32)"]
@@ -19187,7 +19139,7 @@ pub fn vqshrnh_n_u16<const N: i32>(a: u16) -> u8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrns_n_u32<const N: i32>(a: u32) -> u16 {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_extract!(vqshrn_n_u32::<N>(vdupq_n_u32(a)), 0) }
+    vget_lane_u16::<0>(vqshrn_n_u32::<N>(vdupq_n_u32(a)))
 }
 #[doc = "Signed saturating shift right unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s16)"]
@@ -19237,7 +19189,7 @@ pub fn vqshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x4
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrund_n_s64<const N: i32>(a: i64) -> u32 {
     static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_extract!(vqshrun_n_s64::<N>(vdupq_n_s64(a)), 0) }
+    vget_lane_u32::<0>(vqshrun_n_s64::<N>(vdupq_n_s64(a)))
 }
 #[doc = "Signed saturating shift right unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrunh_n_s16)"]
@@ -19248,7 +19200,7 @@ pub fn vqshrund_n_s64<const N: i32>(a: i64) -> u32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrunh_n_s16<const N: i32>(a: i16) -> u8 {
     static_assert!(N >= 1 && N <= 8);
-    unsafe { simd_extract!(vqshrun_n_s16::<N>(vdupq_n_s16(a)), 0) }
+    vget_lane_u8::<0>(vqshrun_n_s16::<N>(vdupq_n_s16(a)))
 }
 #[doc = "Signed saturating shift right unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshruns_n_s32)"]
@@ -19259,7 +19211,7 @@ pub fn vqshrunh_n_s16<const N: i32>(a: i16) -> u8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshruns_n_s32<const N: i32>(a: i32) -> u16 {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_extract!(vqshrun_n_s32::<N>(vdupq_n_s32(a)), 0) }
+    vget_lane_u16::<0>(vqshrun_n_s32::<N>(vdupq_n_s32(a)))
 }
 #[doc = "Saturating subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubb_s8)"]
@@ -19270,7 +19222,7 @@ pub fn vqshruns_n_s32<const N: i32>(a: i32) -> u16 {
 pub fn vqsubb_s8(a: i8, b: i8) -> i8 {
     let a: int8x8_t = vdup_n_s8(a);
     let b: int8x8_t = vdup_n_s8(b);
-    unsafe { simd_extract!(vqsub_s8(a, b), 0) }
+    vget_lane_s8::<0>(vqsub_s8(a, b))
 }
 #[doc = "Saturating subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubh_s16)"]
@@ -19281,7 +19233,7 @@ pub fn vqsubb_s8(a: i8, b: i8) -> i8 {
 pub fn vqsubh_s16(a: i16, b: i16) -> i16 {
     let a: int16x4_t = vdup_n_s16(a);
     let b: int16x4_t = vdup_n_s16(b);
-    unsafe { simd_extract!(vqsub_s16(a, b), 0) }
+    vget_lane_s16::<0>(vqsub_s16(a, b))
 }
 #[doc = "Saturating subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubb_u8)"]
@@ -19292,7 +19244,7 @@ pub fn vqsubh_s16(a: i16, b: i16) -> i16 {
 pub fn vqsubb_u8(a: u8, b: u8) -> u8 {
     let a: uint8x8_t = vdup_n_u8(a);
     let b: uint8x8_t = vdup_n_u8(b);
-    unsafe { simd_extract!(vqsub_u8(a, b), 0) }
+    vget_lane_u8::<0>(vqsub_u8(a, b))
 }
 #[doc = "Saturating subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubh_u16)"]
@@ -19303,7 +19255,7 @@ pub fn vqsubb_u8(a: u8, b: u8) -> u8 {
 pub fn vqsubh_u16(a: u16, b: u16) -> u16 {
     let a: uint16x4_t = vdup_n_u16(a);
     let b: uint16x4_t = vdup_n_u16(b);
-    unsafe { simd_extract!(vqsub_u16(a, b), 0) }
+    vget_lane_u16::<0>(vqsub_u16(a, b))
 }
 #[doc = "Saturating subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubs_s32)"]
@@ -22050,7 +22002,7 @@ pub fn vrnd32x_f64(a: float64x1_t) -> float64x1_t {
         )]
         fn _vrnd32x_f64(a: f64) -> f64;
     }
-    unsafe { transmute(_vrnd32x_f64(simd_extract!(a, 0))) }
+    unsafe { transmute(_vrnd32x_f64(vget_lane_f64::<0>(a))) }
 }
 #[doc = "Floating-point round to 32-bit integer toward zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f32)"]
@@ -22114,7 +22066,7 @@ pub fn vrnd32z_f64(a: float64x1_t) -> float64x1_t {
         )]
         fn _vrnd32z_f64(a: f64) -> f64;
     }
-    unsafe { transmute(_vrnd32z_f64(simd_extract!(a, 0))) }
+    unsafe { transmute(_vrnd32z_f64(vget_lane_f64::<0>(a))) }
 }
 #[doc = "Floating-point round to 64-bit integer, using current rounding mode"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f32)"]
@@ -22178,7 +22130,7 @@ pub fn vrnd64x_f64(a: float64x1_t) -> float64x1_t {
         )]
         fn _vrnd64x_f64(a: f64) -> f64;
     }
-    unsafe { transmute(_vrnd64x_f64(simd_extract!(a, 0))) }
+    unsafe { transmute(_vrnd64x_f64(vget_lane_f64::<0>(a))) }
 }
 #[doc = "Floating-point round to 64-bit integer toward zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f32)"]
@@ -22242,7 +22194,7 @@ pub fn vrnd64z_f64(a: float64x1_t) -> float64x1_t {
         )]
         fn _vrnd64z_f64(a: f64) -> f64;
     }
-    unsafe { transmute(_vrnd64z_f64(simd_extract!(a, 0))) }
+    unsafe { transmute(_vrnd64z_f64(vget_lane_f64::<0>(a))) }
 }
 #[doc = "Floating-point round to integral, toward zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f16)"]
@@ -24157,7 +24109,7 @@ pub fn vsqaddq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(usqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vsqaddb_u8(a: u8, b: i8) -> u8 {
-    unsafe { simd_extract!(vsqadd_u8(vdup_n_u8(a), vdup_n_s8(b)), 0) }
+    vget_lane_u8::<0>(vsqadd_u8(vdup_n_u8(a), vdup_n_s8(b)))
 }
 #[doc = "Unsigned saturating accumulate of signed value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddh_u16)"]
@@ -24166,7 +24118,7 @@ pub fn vsqaddb_u8(a: u8, b: i8) -> u8 {
 #[cfg_attr(test, assert_instr(usqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vsqaddh_u16(a: u16, b: i16) -> u16 {
-    unsafe { simd_extract!(vsqadd_u16(vdup_n_u16(a), vdup_n_s16(b)), 0) }
+    vget_lane_u16::<0>(vsqadd_u16(vdup_n_u16(a), vdup_n_s16(b)))
 }
 #[doc = "Unsigned saturating accumulate of signed value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddd_u64)"]
@@ -25845,7 +25797,7 @@ pub unsafe fn vstl1q_lane_p64<const LANE: i32>(ptr: *mut p64, val: poly64x2_t) {
 pub unsafe fn vstl1_lane_s64<const LANE: i32>(ptr: *mut i64, val: int64x1_t) {
     static_assert!(LANE == 0);
     let atomic_dst = ptr as *mut crate::sync::atomic::AtomicI64;
-    let lane: i64 = simd_extract!(val, LANE as u32);
+    let lane: i64 = vget_lane_s64::<LANE>(val);
     (*atomic_dst).store(transmute(lane), crate::sync::atomic::Ordering::Release)
 }
 #[doc = "Store-Release a single-element structure from one lane of one register."]
@@ -25861,7 +25813,7 @@ pub unsafe fn vstl1_lane_s64<const LANE: i32>(ptr: *mut i64, val: int64x1_t) {
 pub unsafe fn vstl1q_lane_s64<const LANE: i32>(ptr: *mut i64, val: int64x2_t) {
     static_assert_uimm_bits!(LANE, 1);
     let atomic_dst = ptr as *mut crate::sync::atomic::AtomicI64;
-    let lane: i64 = simd_extract!(val, LANE as u32);
+    let lane: i64 = vgetq_lane_s64::<LANE>(val);
     (*atomic_dst).store(transmute(lane), crate::sync::atomic::Ordering::Release)
 }
 #[doc = "Subtract"]
@@ -27245,7 +27197,7 @@ pub fn vuqaddq_s64(a: int64x2_t, b: uint64x2_t) -> int64x2_t {
 #[cfg_attr(test, assert_instr(suqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vuqaddb_s8(a: i8, b: u8) -> i8 {
-    unsafe { simd_extract!(vuqadd_s8(vdup_n_s8(a), vdup_n_u8(b)), 0) }
+    vget_lane_s8::<0>(vuqadd_s8(vdup_n_s8(a), vdup_n_u8(b)))
 }
 #[doc = "Signed saturating accumulate of unsigned value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddh_s16)"]
@@ -27254,7 +27206,7 @@ pub fn vuqaddb_s8(a: i8, b: u8) -> i8 {
 #[cfg_attr(test, assert_instr(suqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vuqaddh_s16(a: i16, b: u16) -> i16 {
-    unsafe { simd_extract!(vuqadd_s16(vdup_n_s16(a), vdup_n_u16(b)), 0) }
+    vget_lane_s16::<0>(vuqadd_s16(vdup_n_s16(a), vdup_n_u16(b)))
 }
 #[doc = "Signed saturating accumulate of unsigned value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddd_s64)"]
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index f24a12ad40c95..1ae7f3f3b0f2b 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -1659,7 +1659,7 @@ pub fn vabsq_s32(a: int32x4_t) -> int32x4_t {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vabsh_f16(a: f16) -> f16 {
-    unsafe { simd_extract!(vabs_f16(vdup_n_f16(a)), 0) }
+    vget_lane_f16::<0>(vabs_f16(vdup_n_f16(a)))
 }
 #[doc = "Floating-point Add (vector)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vadd_f16)"]
@@ -10945,7 +10945,7 @@ pub fn vdupq_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
 )]
 pub fn vdup_laneq_s64<const N: i32>(a: int64x2_t) -> int64x1_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe { transmute::<i64, _>(simd_extract!(a, N as u32)) }
+    unsafe { transmute(vgetq_lane_s64::<N>(a)) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u64)"]
@@ -10968,7 +10968,7 @@ pub fn vdup_laneq_s64<const N: i32>(a: int64x2_t) -> int64x1_t {
 )]
 pub fn vdup_laneq_u64<const N: i32>(a: uint64x2_t) -> uint64x1_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe { transmute::<u64, _>(simd_extract!(a, N as u32)) }
+    unsafe { transmute(vgetq_lane_u64::<N>(a)) }
 }
 #[doc = "Create a new vector with all lanes set to a value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_f16)"]
@@ -35281,7 +35281,7 @@ pub fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
 )]
 pub fn vqdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vqdmulh_s16(a, vdup_n_s16(simd_extract!(b, LANE as u32))) }
+    vqdmulh_s16(a, vdup_n_s16(vgetq_lane_s16::<LANE>(b)))
 }
 #[doc = "Vector saturating doubling multiply high by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s16)"]
@@ -35304,7 +35304,7 @@ pub fn vqdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4
 )]
 pub fn vqdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vqdmulhq_s16(a, vdupq_n_s16(simd_extract!(b, LANE as u32))) }
+    vqdmulhq_s16(a, vdupq_n_s16(vgetq_lane_s16::<LANE>(b)))
 }
 #[doc = "Vector saturating doubling multiply high by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s32)"]
@@ -35327,7 +35327,7 @@ pub fn vqdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x
 )]
 pub fn vqdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqdmulh_s32(a, vdup_n_s32(simd_extract!(b, LANE as u32))) }
+    vqdmulh_s32(a, vdup_n_s32(vgetq_lane_s32::<LANE>(b)))
 }
 #[doc = "Vector saturating doubling multiply high by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s32)"]
@@ -35350,7 +35350,7 @@ pub fn vqdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2
 )]
 pub fn vqdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqdmulhq_s32(a, vdupq_n_s32(simd_extract!(b, LANE as u32))) }
+    vqdmulhq_s32(a, vdupq_n_s32(vgetq_lane_s32::<LANE>(b)))
 }
 #[doc = "Vector saturating doubling multiply high with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s16)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 6950f69731c35..75e7e88362903 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -182,12 +182,12 @@ intrinsics:
       - ['d_f64', 'f64']
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[1]}'
           - - FnCall:
                 - "vabd_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
                   - FnCall: ["vdup_n_{type[1]}", [b]]
-            - 0
+          - - 0
 
   - name: "vabd{type[0]}"
     doc: "Floating-point absolute difference"
@@ -203,12 +203,12 @@ intrinsics:
       - ['h_f16', 'f16']
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[1]}'
           - - FnCall:
                 - "vabd_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
                   - FnCall: ["vdup_n_{type[1]}", [b]]
-            - 0
+          - - 0
 
   - name: "vabdl_high{neon_type[0].noq}"
     doc: Signed Absolute difference Long
@@ -375,12 +375,12 @@ intrinsics:
       - ["d_f64", "f64", "u64"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vceq_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
                   - FnCall: ["vdup_n_{type[1]}", [b]]
-            - '0'
+          - - 0
 
 
   - name: "vceq{type[0]}"
@@ -397,12 +397,12 @@ intrinsics:
       - ["h_f16", "f16", "u16"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vceq_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
                   - FnCall: ["vdup_n_{type[1]}", [b]]
-            - '0'
+          - - 0
 
   - name: "vceqd_{type[0]}"
     doc: "Compare bitwise equal"
@@ -491,12 +491,12 @@ intrinsics:
       - ["h_s16", "i16", "u16", "s16"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[3]}'
           - - FnCall:
                 - "vuqadd_{type[3]}"
                 - - FnCall: ["vdup_n_{type[3]}", [a]]
                   - FnCall: ["vdup_n_{type[2]}", [b]]
-            - '0'
+          - - '0'
 
   - name: "vabs{neon_type.no}"
     doc: "Floating-point absolute value"
@@ -567,12 +567,12 @@ intrinsics:
       - ["d_f64", "f64", "u64"]
     compose:
       - FnCall:
-          - 'simd_extract!'
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vcgt_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
                   - FnCall: ["vdup_n_{type[1]}", [b]]
-            - '0'
+          - - '0'
 
 
   - name: "vcgt{type[0]}"
@@ -589,12 +589,12 @@ intrinsics:
       - ["h_f16", "f16", "u16"]
     compose:
       - FnCall:
-          - 'simd_extract!'
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vcgt_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
                   - FnCall: ["vdup_n_{type[1]}", [b]]
-            - '0'
+          - - '0'
 
   - name: "vclt{neon_type[0].no}"
     doc: "Compare signed less than"
@@ -651,12 +651,12 @@ intrinsics:
       - ["d_f64", "f64", "u64"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vcle_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
                   - FnCall: ["vdup_n_{type[1]}", [b]]
-            - '0'
+          - - '0'
 
 
   - name: "vcle{type[0]}"
@@ -673,12 +673,12 @@ intrinsics:
       - ["h_f16", "f16", "u16"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vcle_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
                   - FnCall: ["vdup_n_{type[1]}", [b]]
-            - '0'
+          - - '0'
 
   - name: "vcge{neon_type[0].no}"
     doc: "Compare signed greater than or equal"
@@ -789,11 +789,11 @@ intrinsics:
       - ["d_f64", "f64", "u64"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vclez_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vclez{type[0]}"
     doc: "Floating-point compare less than or equal to zero"
@@ -809,11 +809,11 @@ intrinsics:
       - ["h_f16", "f16", "u16"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vclez_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vcltz{neon_type[0].no}"
     doc: "Compare signed less than zero"
@@ -872,11 +872,11 @@ intrinsics:
       - ["d_f64", "f64", "u64"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vcltz_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vcltz{type[0]}"
     doc: "Floating-point compare less than zero"
@@ -892,11 +892,11 @@ intrinsics:
       - ["h_f16", "f16", "u16"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vcltz_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vcltzd_s64"
     doc: "Compare less than zero"
@@ -1612,11 +1612,11 @@ intrinsics:
       - ["f64", "f32"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[1]}'
           - - FnCall:
                 - vcvtx_f32_f64
                 - - FnCall: [vdupq_n_f64, [a]]
-            - '0'
+          - - '0'
 
   - name: "vcvtx_high_f32_f64"
     doc: "Floating-point convert to lower precision narrow, rounding to odd"
@@ -2521,7 +2521,7 @@ intrinsics:
       - [float64x1_t, "f64"]
     compose:
       - FnCall: [static_assert!, ['N == 0']]
-      - FnCall: [simd_extract!, [a, 'N as u32']]
+      - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]]
 
   - name: "vdup_laneq_{neon_type[0]}"
     doc: "Set all vector lanes to the same value"
@@ -2539,8 +2539,8 @@ intrinsics:
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 1]]
       - FnCall:
-          - "transmute::<{type[2]}, _>"
-          - - FnCall: [simd_extract!, [a, 'N as u32']]
+          - transmute
+          - - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]]
 
   - name: "vdup{type[2]}"
     doc: "Set all vector lanes to the same value"
@@ -2561,7 +2561,7 @@ intrinsics:
       - [float64x2_t, "f64", d_laneq_f64]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 1]]
-      - FnCall: [simd_extract!, [a, 'N as u32']]
+      - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]]
 
   - name: "vdup{type[2]}"
     doc: "Set all vector lanes to the same value"
@@ -2582,7 +2582,7 @@ intrinsics:
       - [poly16x8_t, "p16", h_laneq_p16]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 3]]
-      - FnCall: [simd_extract!, [a, 'N as u32']]
+      - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]]
 
 
   - name: "vdup{type[2]}"
@@ -2601,7 +2601,7 @@ intrinsics:
       - [float16x4_t, "f16", h_lane_f16]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 2]]
-      - FnCall: [simd_extract!, [a, 'N as u32']]
+      - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]]
 
 
   - name: "vdup{type[2]}"
@@ -2620,7 +2620,7 @@ intrinsics:
       - [float16x8_t, "f16", h_laneq_f16]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 4]]
-      - FnCall: [simd_extract!, [a, 'N as u32']]
+      - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]]
 
 
   - name: "vdup{type[2]}"
@@ -2639,7 +2639,7 @@ intrinsics:
       - [poly8x16_t, "p8", b_laneq_p8]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 4]]
-      - FnCall: [simd_extract!, [a, 'N as u32']]
+      - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]]
 
   - name: "vdup{type[2]}"
     doc: "Set all vector lanes to the same value"
@@ -2660,7 +2660,7 @@ intrinsics:
       - [float32x4_t, "f32", s_laneq_f32]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 2]]
-      - FnCall: [simd_extract!, [a, 'N as u32']]
+      - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]]
 
   - name: "vext{neon_type[0].no}"
     doc: "Extract vector from pair of vectors"
@@ -2877,11 +2877,11 @@ intrinsics:
       - [i64, 'd_s64', 's64']
     compose:
       - FnCall:
-          - 'simd_extract!'
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - 'vqneg_{type[2]}'
                 - - FnCall: ['vdup_n_{type[2]}', [a]]
-            - 0
+          - - 0
 
   - name: "vqneg{neon_type[0].no}"
     doc: Signed saturating negate
@@ -2958,12 +2958,12 @@ intrinsics:
               - "vdup_n_{type[2]}"
               - - b
       - FnCall:
-          - 'simd_extract!'
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vqsub_{type[2]}"
                 - - a
                   - b
-            - "0"
+          - - "0"
 
   - name: "vqsub{type[3]}"
     doc: Saturating subtract
@@ -2989,12 +2989,12 @@ intrinsics:
               - "vdup_n_{type[2]}"
               - - b
       - FnCall:
-          - 'simd_extract!'
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vqsub_{type[2]}"
                 - - a
                   - b
-            - "0"
+          - - "0"
 
   - name: "vrbit{neon_type.no}"
     doc: Reverse bit order
@@ -3443,12 +3443,12 @@ intrinsics:
               - "vdup_n_{type[0]}"
               - - b
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[0]}'
           - - FnCall:
                 - "vqadd_{type[0]}"
                 - - a
                   - b
-            - "0"
+          - - "0"
 
   - name: "vqadd{type[2]}"
     doc: Saturating add
@@ -3474,12 +3474,12 @@ intrinsics:
               - "vdup_n_{type[0]}"
               - - b
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[0]}'
           - - FnCall:
                 - "vqadd_{type[0]}"
                 - - a
                   - b
-            - "0"
+          - - "0"
 
   - name: "vld1{neon_type[1].no}"
     doc: "Load multiple single-element structures to one, two, three, or four registers"
@@ -4484,7 +4484,7 @@ intrinsics:
       - Let:
         - "lane"
         - i64
-        - FnCall: [simd_extract!, [val, 'LANE as u32']]
+        - FnCall: ['vget{neon_type[1].lane_nox}', [val], [LANE]]
       - MethodCall:
         - "(*atomic_dst)"
         - store
@@ -5254,8 +5254,8 @@ intrinsics:
     compose:
       - FnCall:
           - "vmull_{neon_type[0]}"
-          - - FnCall: [simd_extract!, [a, '1']]
-            - FnCall: [simd_extract!, [b, '1']]
+          - - FnCall: ['vget{neon_type[0].lane_nox}', [a], [1]]
+            - FnCall: ['vget{neon_type[0].lane_nox}', [b], [1]]
 
   - name: "vmulx{neon_type.no}"
     doc: Floating-point multiply extended
@@ -5354,11 +5354,8 @@ intrinsics:
           - vmulx_f64
           - - a
             - FnCall:
-                - 'transmute::<f64, _>'
-                - - FnCall:
-                      - "simd_extract!"
-                      - - b
-                        - 'LANE as u32'
+                - 'transmute'
+                - - FnCall: ['vget{neon_type.lane_nox}', [b], [LANE]]
 
   - name: "vmulx{type[0]}"
     doc: Floating-point multiply extended
@@ -5394,16 +5391,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - ["d_lane_f64", "f64", float64x1_t, "d_f64", 'LANE as u32']
+      - ["d_lane_f64", "f64", float64x1_t, "d_f64"]
     compose:
       - FnCall: [static_assert!, ['LANE == 0']]
       - FnCall:
           - "vmulx{type[3]}"
           - - a
-            - FnCall:
-                - "simd_extract!"
-                - - b
-                  - "{type[4]}"
+            - FnCall: ['vget{neon_type[2].lane_nox}', [b], [LANE]]
 
   - name: "vmulx_laneq_f64"
     doc: Floating-point multiply extended
@@ -5423,11 +5417,8 @@ intrinsics:
           - vmulx_f64
           - - a
             - FnCall:
-                - 'transmute::<f64, _>'
-                - - FnCall:
-                      - "simd_extract!"
-                      - - b
-                        - 'LANE as u32'
+                - 'transmute'
+                - - FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]
 
   - name: "vmulx{type[0]}"
     doc: Floating-point multiply extended
@@ -5497,18 +5488,15 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - ['s_lane_f32', f32, float32x2_t, '1', 's_f32', 'LANE as u32']
-      - ['s_laneq_f32', f32, float32x4_t, '2', 's_f32', 'LANE as u32']
-      - ['d_laneq_f64', f64, float64x2_t, '1', 'd_f64', 'LANE as u32']
+      - ['s_lane_f32', f32, float32x2_t, '1', 's_f32']
+      - ['s_laneq_f32', f32, float32x4_t, '2', 's_f32']
+      - ['d_laneq_f64', f64, float64x2_t, '1', 'd_f64']
     compose:
       - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]]
       - FnCall:
           - "vmulx{type[4]}"
           - - a
-            - FnCall:
-                - "simd_extract!"
-                - - b
-                  - "{type[5]}"
+            - FnCall: ['vget{neon_type[2].lane_nox}', [b], [LANE]]
 
 
   - name: "vmulx{type[0]}"
@@ -5531,10 +5519,7 @@ intrinsics:
       - FnCall:
           - "vmulx{type[4]}"
           - - a
-            - FnCall:
-                - "simd_extract!"
-                - - b
-                  - "{type[5]}"
+            - FnCall: ['vget{neon_type[2].lane_nox}', [b], [LANE]]
 
 
   - name: "vmulx{neon_type[0].N}"
@@ -7084,11 +7069,11 @@ intrinsics:
       - Let:
           - a1
           - "{type[2]}"
-          - FnCall: [simd_extract!, [a, '0']]
+          - FnCall: ['vget{neon_type[1].lane_nox}', [a], [0]]
       - Let:
           - a2
           - "{type[2]}"
-          - FnCall: [simd_extract!, [a, '1']]
+          - FnCall: ['vget{neon_type[1].lane_nox}', [a], [1]]
       - Identifier: ['a1 + a2', Symbol]
 
   - name: "vpmin{type[0]}"
@@ -7121,7 +7106,7 @@ intrinsics:
     compose:
       - Let: [a, int16x4_t, {FnCall: [vdup_n_s16, [a]]}]
       - Let: [b, int16x4_t, {FnCall: [vdup_n_s16, [b]]}]
-      - FnCall: [simd_extract!, [{FnCall: [vqdmull_s16, [a, b]]}, '0']]
+      - FnCall: ['vgetq_lane_{type[1]}', [{FnCall: [vqdmull_s16, [a, b]]}], ['0']]
 
   - name: "vqdmulls_s32"
     doc: "Signed saturating doubling multiply long"
@@ -7187,7 +7172,7 @@ intrinsics:
       - ["i32", int32x4_t, "i64", 's_laneq_s32', 's_s32']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 2]]
-      - Let: [b, "{type[0]}", {FnCall: [simd_extract!, [b, 'N as u32']]}]
+      - Let: [b, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [b], [N]]}]
       - FnCall: ["vqdmull{type[4]}", [a, b]]
 
   - name: "vqdmullh_laneq_s16"
@@ -7204,7 +7189,7 @@ intrinsics:
       - ["i16", int16x8_t, "i32"]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 3]]
-      - Let: [b, "{type[0]}", {FnCall: [simd_extract!, [b, 'N as u32']]}]
+      - Let: [b, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [b], [N]]}]
       - FnCall: ["vqdmullh_s16", [a, b]]
 
   - name: "vqdmulls_lane_s32"
@@ -7221,7 +7206,7 @@ intrinsics:
       - ["i32", int32x2_t, "i64"]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 1]]
-      - Let: [b, "{type[0]}", {FnCall: [simd_extract!, [b, 'N as u32']]}]
+      - Let: [b, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [b], [N]]}]
       - FnCall: ["vqdmulls_s32", [a, b]]
 
   - name: "vqdmull{type[6]}"
@@ -7360,7 +7345,7 @@ intrinsics:
       - ["i32", "i16", "s16"]
     compose:
       - Let: [x, int32x4_t, {FnCall: [vqdmull_s16, [{FnCall: [vdup_n_s16, [b]]}, {FnCall: [vdup_n_s16, [c]]}]]}]
-      - FnCall: [vqadds_s32, [a, {FnCall: [simd_extract!, [x, 0]]}]]
+      - FnCall: [vqadds_s32, [a, {FnCall: ['vgetq_lane_s32', [x], [0]]}]]
 
   - name: "vqdmlals_s32"
     doc: "Signed saturating doubling multiply-add long"
@@ -7393,7 +7378,7 @@ intrinsics:
       - ["i64", "i32", int32x4_t, "i64", s_laneq_s32, '2', s_s32]
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]]
-      - FnCall: ["vqdmlal{type[6]}", [a, b, {FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+      - FnCall: ["vqdmlal{type[6]}", [a, b, {FnCall: ['vget{neon_type[2].lane_nox}', [c], [LANE]]}]]
 
   - name: "vqdmlal_laneq_s16"
     doc: "Vector widening saturating doubling multiply accumulate with scalar"
@@ -7474,7 +7459,7 @@ intrinsics:
       - ["i32", "i16"]
     compose:
       - Let: [x, int32x4_t, {FnCall: [vqdmull_s16, [{FnCall: [vdup_n_s16, [b]]}, {FnCall: [vdup_n_s16, [c]]}]]}]
-      - FnCall: [vqsubs_s32, [a, {FnCall: [simd_extract!, [x, '0']]}]]
+      - FnCall: [vqsubs_s32, [a, {FnCall: ['vgetq_lane_s32', [x], [0]]}]]
 
   - name: "vqdmlsls_s32"
     doc: "Signed saturating doubling multiply-subtract long"
@@ -7507,7 +7492,7 @@ intrinsics:
       - ["i64", "i32", int32x4_t, "i64", 's_laneq_s32', '2', 's_s32']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]]
-      - FnCall: ["vqdmlsl{type[6]}", [a, b, {FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+      - FnCall: ["vqdmlsl{type[6]}", [a, b, {FnCall: ['vget{neon_type[2].lane_nox}', [c], [LANE]]}]]
 
   - name: "vqdmlsl_laneq_s16"
     doc: "Vector widening saturating doubling multiply subtract with scalar"
@@ -7555,7 +7540,7 @@ intrinsics:
     compose:
       - Let: [a, "{neon_type[3]}", {FnCall: ["vdup_n{neon_type[3].no}", [a]]}]
       - Let: [b, "{neon_type[3]}", {FnCall: ["vdup_n{neon_type[3].no}", [b]]}]
-      - FnCall: [simd_extract!, [{FnCall: ["vqdmulh{neon_type[3].no}", [a, b]]}, '0']]
+      - FnCall: ['vget{neon_type[3].lane_nox}', [{FnCall: ["vqdmulh{neon_type[3].no}", [a, b]]}], ['0']]
 
   - name: "vqdmulhh{type[3]}"
     doc: "Signed saturating doubling multiply returning high half"
@@ -7572,7 +7557,7 @@ intrinsics:
       - ["i16", int16x8_t, "i16", '_laneq_s16', '3']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, "{type[4]}"]]
-      - Let: [b, 'i16', {FnCall: [simd_extract!, [b, 'N as u32']]}]
+      - Let: [b, 'i16', {FnCall: ['vget{neon_type[1].lane_nox}', [b], [N]]}]
       - FnCall: ['vqdmulhh_s16', [a, b]]
 
   - name: "vqdmulhs{type[3]}"
@@ -7590,7 +7575,7 @@ intrinsics:
       - ["i32", int32x4_t, "i32", "_laneq_s32", '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, "{type[4]}"]]
-      - Let: [b, 'i32', {FnCall: [simd_extract!, [b, 'N as u32']]}]
+      - Let: [b, 'i32', {FnCall: ['vget{neon_type[1].lane_nox}', [b], [N]]}]
       - FnCall: ['vqdmulhs_s32', [a, b]]
 
   - name: "vqmovn_high{neon_type[1].noq}"
@@ -7635,7 +7620,7 @@ intrinsics:
       - ["i16", "i8", 'h_s16', s16]
       - ["i32", "i16", 's_s32', s32]
     compose:
-      - FnCall: [simd_extract!, [{FnCall: ["vqmovn_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}, '0']]
+      - FnCall: ['vget_lane_{type[1]}', [{FnCall: ["vqmovn_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}], ['0']]
 
   - name: "vqmovn{type[2]}"
     doc: "Saturating extract narrow"
@@ -7649,7 +7634,7 @@ intrinsics:
       - ["u16", "u8", 'h_u16', 'u16']
       - ["u32", "u16", 's_u32', 'u32']
     compose:
-      - FnCall: [simd_extract!, [{FnCall: ["vqmovn_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}, '0']]
+      - FnCall: ['vget_lane_{type[1]}', [{FnCall: ["vqmovn_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}], ['0']]
 
   - name: "vqmovnd_s64"
     doc: "Saturating extract narrow"
@@ -7698,7 +7683,7 @@ intrinsics:
       - ["i32", "u16", 's_s32', s32]
       - ["i64", "u32", 'd_s64', s64]
     compose:
-      - FnCall: [simd_extract!, [{FnCall: ["vqmovun_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}, '0']]
+      - FnCall: ['vget_lane_{type[1]}', [{FnCall: ["vqmovun_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}], ['0']]
 
   - name: "vqmovun_high_{neon_type[1]}"
     doc: "Signed saturating extract unsigned narrow"
@@ -7727,7 +7712,7 @@ intrinsics:
       - ["i16", 'h_s16', 's16']
       - ["i32", 's_s32', 's32']
     compose:
-      - FnCall: [simd_extract!, [{FnCall: ["vqrdmulh_{type[2]}", [{FnCall: ["vdup_n_{type[2]}", [a]]}, {FnCall: ["vdup_n_{type[2]}", [b]]}]]}, '0']]
+      - FnCall: ['vget_lane_{type[2]}', [{FnCall: ["vqrdmulh_{type[2]}", [{FnCall: ["vdup_n_{type[2]}", [a]]}, {FnCall: ["vdup_n_{type[2]}", [b]]}]]}], ['0']]
 
   - name: "vqrdmulh{type[2]}"
     doc: "Signed saturating rounding doubling multiply returning high half"
@@ -7746,7 +7731,7 @@ intrinsics:
       - ["i32", int32x4_t, 's_laneq_s32', 's_s32', '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[4]}"]]
-      - FnCall: ["vqrdmulh{type[3]}", [a, {FnCall: [simd_extract!, [b, 'LANE as u32']]}]]
+      - FnCall: ["vqrdmulh{type[3]}", [a, {FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]}]]
 
   - name: "vqrdmlah{neon_type.no}"
     doc: "Signed saturating rounding doubling multiply accumulate returning high half"
@@ -7785,7 +7770,7 @@ intrinsics:
       - Let: [a, "{neon_type[1]}", {FnCall: ["vdup_n_{type[2]}", [a]]}]
       - Let: [b, "{neon_type[1]}", {FnCall: ["vdup_n_{type[2]}", [b]]}]
       - Let: [c, "{neon_type[1]}", {FnCall: ["vdup_n_{type[2]}", [c]]}]
-      - FnCall: [simd_extract!, [{FnCall: ["vqrdmlah_{type[2]}", [a, b, c]]}, '0']]
+      - FnCall: ['vget_lane_{type[2]}', [{FnCall: ["vqrdmlah_{type[2]}", [a, b, c]]}], ['0']]
 
   - name: "vqrdmlah{type[0]}"
     doc: "Signed saturating rounding doubling multiply accumulate returning high half"
@@ -7830,7 +7815,7 @@ intrinsics:
       - ["i32", int32x4_t, '2', "s_s32", s_laneq_s32, s_s32]
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]]
-      - FnCall: ["vqrdmlah{type[5]}", [a, b, {FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+      - FnCall: ["vqrdmlah{type[5]}", [a, b, {FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]
 
   - name: "vqrdmlsh{neon_type.no}"
     doc: "Signed saturating rounding doubling multiply subtract returning high half"
@@ -7869,7 +7854,7 @@ intrinsics:
       - Let: [a, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [a]]}]
       - Let: [b, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [b]]}]
       - Let: [c, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [c]]}]
-      - FnCall: [simd_extract!, [{FnCall: ["vqrdmlsh_{type[3]}", [a, b, c]]}, '0']]
+      - FnCall: ['vget{neon_type[2].lane_nox}', [{FnCall: ["vqrdmlsh_{type[3]}", [a, b, c]]}], ['0']]
 
   - name: "vqrdmlsh{type[0]}"
     doc: "Signed saturating rounding doubling multiply subtract returning high half"
@@ -7914,7 +7899,7 @@ intrinsics:
       - ["i32", int32x4_t, '2', s_laneq_s32, s_s32]
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]]
-      - FnCall: ["vqrdmlsh{type[4]}", [a, b, {FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+      - FnCall: ["vqrdmlsh{type[4]}", [a, b, {FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]
 
   - name: "vqrshl{type[0]}"
     doc: "Signed saturating rounding shift left"
@@ -7948,7 +7933,7 @@ intrinsics:
     compose:
       - Let: [a, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [a]]}]
       - Let: [b, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [b]]}]
-      - FnCall: [simd_extract!, [{FnCall: ["vqrshl_{type[3]}", [a, b]]}, '0']]
+      - FnCall: ['vget{neon_type[2].lane_nox}', [{FnCall: ["vqrshl_{type[3]}", [a, b]]}], ['0']]
 
   - name: "vqrshl{type[2]}"
     doc: "Unsigned signed saturating rounding shift left"
@@ -7982,7 +7967,7 @@ intrinsics:
     compose:
       - Let: [a, "{neon_type[3]}", {FnCall: ["vdup_n_{type[0]}", [a]]}]
       - Let: [b, "{neon_type[4]}", {FnCall: ["vdup_n_{type[5]}", [b]]}]
-      - FnCall: [simd_extract!, [{FnCall: ["vqrshl_{type[0]}", [a, b]]}, '0']]
+      - FnCall: ['vget{neon_type[3].lane_nox}', [{FnCall: ["vqrshl_{type[0]}", [a, b]]}], ['0']]
 
   - name: "vqrshrn{type[2]}"
     doc: "Signed saturating rounded shift right narrow"
@@ -8001,7 +7986,7 @@ intrinsics:
     compose:
       - FnCall: [static_assert!, ["{type[3]}"]]
       - Let: [a, "{neon_type[4]}", {FnCall: ["vdup{type[5]}", [a]]}]
-      - FnCall: [simd_extract!, [{FnCall: ["vqrshrn_n{neon_type[4].noq}::<N>", [a]]}, '0']]
+      - FnCall: ['vget_lane_{type[1]}', [{FnCall: ["vqrshrn_n{neon_type[4].noq}::<N>", [a]]}], ['0']]
 
   - name: "vqrshrn{type[3]}"
     doc: "Signed saturating rounded shift right narrow"
@@ -8038,7 +8023,7 @@ intrinsics:
     compose:
       - FnCall: [static_assert!, ['{type[3]}']]
       - Let: [a, "{neon_type[4]}", {FnCall: ["vdup{type[5]}", [a]]}]
-      - FnCall: [simd_extract!, [{FnCall: ["vqrshrn{type[6]}::<N>", [a]]}, '0']]
+      - FnCall: ['vget_lane_{type[2]}', [{FnCall: ["vqrshrn{type[6]}::<N>", [a]]}], ['0']]
 
   - name: "vqrshrn_high_n{neon_type[1].noq}"
     doc: "Unsigned saturating rounded shift right narrow"
@@ -8085,11 +8070,11 @@ intrinsics:
           - "{neon_type[4]}"
           - FnCall: ["vdupq_n_{type[5]}", [a]]
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vqrshrun_n_{type[5]}::<N>"
                 - - a
-            - '0'
+          - - '0'
 
   - name: "vqrshrun_high_n{neon_type[1].noq}"
     doc: "Signed saturating rounded shift right unsigned narrow"
@@ -8152,7 +8137,7 @@ intrinsics:
               - "vqshl{neon_type[2].noq}"
               - - FnCall: ["vdup_n{neon_type[2].no}", [a]]
                 - FnCall: ["vdup_n{neon_type[2].no}", [b]]
-      - FnCall: [simd_extract!, [c, '0']]
+      - FnCall: ['vget{neon_type[2].lane_nox}', [c], ['0']]
 
   - name: "vqshl{type[0]}"
     doc: "Signed saturating shift left"
@@ -8172,11 +8157,11 @@ intrinsics:
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, "{type[2]}"]]
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[1]}'
           - - FnCall:
                 - "vqshl_n_{type[3]}::<N>"
                 - - FnCall: ["vdup_n_{type[3]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vqshld_{type[0]}"
     doc: "Unsigned saturating shift left"
@@ -8215,7 +8200,7 @@ intrinsics:
               - "vqshl{neon_type[3].noq}"
               - - FnCall: ["vdup{neon_type[3].N}", [a]]
                 - FnCall: ["vdup{neon_type[4].N}", [b]]
-      - FnCall: [simd_extract!, [c, '0']]
+      - FnCall: ['vget{neon_type[3].lane_nox}', [c], ['0']]
 
   - name: "vqshl{type[0]}"
     doc: "Unsigned saturating shift left"
@@ -8235,9 +8220,9 @@ intrinsics:
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, "{type[2]}"]]
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[1]}'
           - - FnCall: ["vqshl_n_{type[1]}::<N>", [{FnCall: ["vdup_n_{type[1]}", [a]]}]]
-            - '0'
+          - - '0'
 
   - name: "vqshrnd_n_s64"
     doc: "Signed saturating shift right narrow"
@@ -8279,11 +8264,11 @@ intrinsics:
     compose:
       - FnCall: [static_assert!, ["{type[3]}"]]
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vqshrn_n_{type[4]}::<N>"
                 - - FnCall: ["vdupq_n_{type[4]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vqshrn{type[0]}"
     doc: "Signed saturating shift right narrow"
@@ -8347,11 +8332,11 @@ intrinsics:
     compose:
       - FnCall: [static_assert!, ["{type[3]}"]]
       - FnCall:
-          - "simd_extract!"
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vqshrn_n_{type[1]}::<N>"
                 - - FnCall: ["vdupq_n_{type[1]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vqshrn{type[0]}"
     doc: "Unsigned saturating shift right narrow"
@@ -8392,11 +8377,11 @@ intrinsics:
     compose:
       - FnCall: [static_assert!, ["{type[3]}"]]
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vqshrun_n_{type[4]}::<N>"
                 - - FnCall: ["vdupq_n_{type[4]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vqshrun_high_n_{neon_type[1]}"
     doc: "Signed saturating shift right unsigned narrow"
@@ -8433,12 +8418,12 @@ intrinsics:
       - [h_u16, "u16", "i16", s16]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[1]}'
           - - FnCall:
                 - "vsqadd_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
                   - FnCall: ["vdup_n_{type[2]}", [b]]
-            - '0'
+          - - '0'
 
   - name: "vsqadd{type[0]}"
     doc: "Unsigned saturating accumulate of signed value"
@@ -9026,14 +9011,14 @@ intrinsics:
     static_defs: ['const LANE1: i32, const LANE2: i32']
     safety: safe
     types:
-      - [float64x2_t, float64x1_t, '0', '1', 'transmute::<f64, _>(simd_extract!(b, LANE2 as u32))']
-      - [poly64x2_t, poly64x1_t, '0', '1', 'transmute::<u64, _>(simd_extract!(b, LANE2 as u32))']
-      - [uint64x2_t, uint64x1_t, '0', '1', 'transmute::<u64, _>(simd_extract!(b, LANE2 as u32))']
-      - [int64x2_t, int64x1_t, '0', '1', 'transmute::<i64, _>(simd_extract!(b, LANE2 as u32))']
+      - [float64x2_t, float64x1_t, '0', '1']
+      - [poly64x2_t, poly64x1_t, '0', '1']
+      - [uint64x2_t, uint64x1_t, '0', '1']
+      - [int64x2_t, int64x1_t, '0', '1']
     compose:
       - FnCall: [static_assert!, ['LANE1 == {type[2]}']]
       - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[3]}']]
-      - Identifier: ["{type[4]}", UnsafeSymbol]
+      - FnCall: [transmute, [{FnCall: ['vget{neon_type[0].lane_nox}', [b], [LANE2]]}]]
 
   - name: "vcopy{neon_type[0].laneq_nox}"
     doc: "Insert vector element from another vector element"
@@ -9566,7 +9551,7 @@ intrinsics:
           - transmute
           - - FnCall:
                 - _vrnd32x_f64
-                - - FnCall: [simd_extract!, [a, 0]]
+                - - FnCall: ['vget{neon_type.lane_nox}', [a], [0]]
 
   - name: "vrnd32z{neon_type.no}"
     doc: "Floating-point round to 32-bit integer toward zero"
@@ -9610,7 +9595,7 @@ intrinsics:
               arch: aarch64,arm64ec
       - FnCall:
           - transmute
-          - - FnCall: [_vrnd32z_f64, [{FnCall: [simd_extract!, [a, 0]]}]]
+          - - FnCall: [_vrnd32z_f64, [{FnCall: ['vget{neon_type.lane_nox}', [a], [0]]}]]
 
   - name: "vrnd64x{neon_type.no}"
     doc: "Floating-point round to 64-bit integer, using current rounding mode"
@@ -9654,7 +9639,7 @@ intrinsics:
               arch: aarch64,arm64ec
       - FnCall:
           - transmute
-          - - FnCall: [_vrnd64x_f64, [{FnCall: [simd_extract!, [a, 0]]}]]
+          - - FnCall: [_vrnd64x_f64, [{FnCall: ['vget{neon_type.lane_nox}', [a], [0]]}]]
 
   - name: "vrnd64z{neon_type.no}"
     doc: "Floating-point round to 64-bit integer toward zero"
@@ -9698,7 +9683,7 @@ intrinsics:
               arch: aarch64,arm64ec
       - FnCall:
           - transmute
-          - - FnCall: [_vrnd64z_f64, [{FnCall: [simd_extract!, [a, 0]]}]]
+          - - FnCall: [_vrnd64z_f64, [{FnCall: ['vget{neon_type.lane_nox}', [a], [0]]}]]
 
   - name: "vtrn1{neon_type[0].no}"
     doc: Transpose vectors
@@ -10323,7 +10308,7 @@ intrinsics:
           - "vfma{neon_type[0].no}"
           - - a
             - b
-            - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+            - FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]
 
 
   - name: "vfma{type[3]}"
@@ -10349,7 +10334,7 @@ intrinsics:
           - "vfma{neon_type[0].no}"
           - - a
             - b
-            - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+            - FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]
 
 
   # vfms lane f16
@@ -10376,7 +10361,7 @@ intrinsics:
           - "vfms{neon_type[0].no}"
           - - a
             - b
-            - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+            - FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]
 
 
   - name: "vfms{type[1]}"
@@ -10413,7 +10398,7 @@ intrinsics:
           - "vfma{neon_type.no}"
           - - a
             - b
-            - FnCall: ["vdup{neon_type.N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+            - FnCall: ["vdup{neon_type.N}", [{FnCall: ['vget{neon_type.lane_nox}', [c], [LANE]]}]]
 
   - name: "vfma_laneq_f64"
     doc: "Floating-point fused multiply-add to accumulator"
@@ -10433,7 +10418,7 @@ intrinsics:
           - "vfma{neon_type[0].no}"
           - - a
             - b
-            - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+            - FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]
 
   - name: "vfmaq_lane_f64"
     doc: "Floating-point fused multiply-add to accumulator"
@@ -10453,7 +10438,7 @@ intrinsics:
           - "vfma{neon_type[0].no}"
           - - a
             - b
-            - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+            - FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]
 
   - name: "vfma{type[2]}"
     doc: "Floating-point fused multiply-add to accumulator"
@@ -10471,7 +10456,7 @@ intrinsics:
       - ["f64", float64x2_t, "d_laneq_f64", '1']
     compose:
       - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]]
-      - Let: [c, "{type[0]}", {FnCall: [simd_extract!, [c, 'LANE as u32']]}]
+      - Let: [c, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]
       - FnCall: ["fma{type[0]}", [b, c, a]]
 
   - name: "vfmad_lane_f64"
@@ -10488,7 +10473,7 @@ intrinsics:
       - ["f64", float64x1_t]
     compose:
       - FnCall: [static_assert!, ['LANE == 0']]
-      - Let: [c, "{type[0]}", {FnCall: [simd_extract!, [c, 'LANE as u32']]}]
+      - Let: [c, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]
       - FnCall: [fmaf64, [b, c, a]]
 
 
@@ -10525,7 +10510,7 @@ intrinsics:
       - ["f16", float16x8_t, 'q_f16', '3']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
-      - Let: [c, "{type[0]}", {FnCall: [simd_extract!, [v, 'LANE as u32']]}]
+      - Let: [c, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [v], [LANE]]}]
       - FnCall: ["vfmah_{type[0]}", [a, b, c]]
 
   - name: "vfmsh_lane{type[2]}"
@@ -10545,7 +10530,7 @@ intrinsics:
       - ["f16", float16x8_t, 'q_f16', '3']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
-      - Let: [c, "{type[0]}", {FnCall: [simd_extract!, [v, 'LANE as u32']]}]
+      - Let: [c, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [v], [LANE]]}]
       - FnCall: ["vfmsh_{type[0]}", [a, b, c]]
 
   - name: "vfms_f64"
@@ -10608,7 +10593,7 @@ intrinsics:
       - [float64x2_t, float64x2_t, '1', q_laneq_f64]
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[2]}']]
-      - FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]}]]
+      - FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]}]]
 
   - name: "vfms_lane_f64"
     doc: "Floating-point fused multiply-subtract to accumulator"
@@ -10624,7 +10609,7 @@ intrinsics:
       - float64x1_t
     compose:
       - FnCall: [static_assert!, ['LANE == 0']]
-      - FnCall: ["vfms{neon_type.no}", [a, b, {FnCall: ["vdup{neon_type.N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]}]]
+      - FnCall: ["vfms{neon_type.no}", [a, b, {FnCall: ["vdup{neon_type.N}", [{FnCall: ['vget{neon_type.lane_nox}', [c], [LANE]]}]]}]]
 
   - name: "vfms_laneq_f64"
     doc: "Floating-point fused multiply-subtract to accumulator"
@@ -10640,7 +10625,7 @@ intrinsics:
       - [float64x1_t, float64x2_t]
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '1']]
-      - FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]}]]
+      - FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]}]]
 
   - name: "vfmsq_lane_f64"
     doc: "Floating-point fused multiply-subtract to accumulator"
@@ -10656,7 +10641,7 @@ intrinsics:
       - [float64x2_t, float64x1_t]
     compose:
       - FnCall: [static_assert!, ['LANE == 0']]
-      - FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]}]]
+      - FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]}]]
 
   - name: "vfms{type[2]}"
     doc: "Floating-point fused multiply-subtract to accumulator"
@@ -10724,11 +10709,11 @@ intrinsics:
       - ["f64", "u64", "d_f64"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[1]}'
           - - FnCall:
                 - "vceqz_{type[0]}"
                 - - FnCall: ["vdup_n_{type[0]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vceqz{type[2]}"
     doc: "Floating-point compare bitwise equal to zero"
@@ -10744,11 +10729,11 @@ intrinsics:
       - ["f16", "u16", "h_f16"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[1]}'
           - - FnCall:
                 - "vceqz_{type[0]}"
                 - - FnCall: ["vdup_n_{type[0]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vceqzd_{type[2]}"
     doc: "Compare bitwise equal to zero"
@@ -10847,12 +10832,12 @@ intrinsics:
       - ["d_f64", "f64", "u64"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vcge_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
                   - FnCall: ["vdup_n_{type[1]}", [b]]
-            - '0'
+          - - '0'
 
 
   - name: "vcge{type[0]}"
@@ -10869,12 +10854,12 @@ intrinsics:
       - ["h_f16", "f16", "u16"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vcge_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
                   - FnCall: ["vdup_n_{type[1]}", [b]]
-            - '0'
+          - - '0'
 
   - name: "vcge{neon_type[0].no}"
     doc: "Floating-point compare greater than or equal"
@@ -10991,11 +10976,11 @@ intrinsics:
       - ["d_f64", "f64", "u64"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vcgez_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
-            - '0'
+          - - '0'
 
 
   - name: "vcgez{type[0]}"
@@ -11012,11 +10997,11 @@ intrinsics:
       - ["h_f16", "f16", "u16"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vcgez_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vclezd_s64"
     doc: "Compare less than or equal to zero"
@@ -11123,11 +11108,11 @@ intrinsics:
       - ["d_f64", "f64", "u64"]
     compose:
       - FnCall:
-          - "simd_extract!"
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vcgtz_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vcgtz{type[0]}"
     doc: "Floating-point compare greater than zero"
@@ -11143,11 +11128,11 @@ intrinsics:
       - ["h_f16", "f16", "u16"]
     compose:
       - FnCall:
-          - "simd_extract!"
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vcgtz_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vcvt{neon_type[1].no}_{neon_type[0]}"
     doc: "Floating-point convert to unsigned fixed-point, rounding toward zero"
@@ -11203,7 +11188,7 @@ intrinsics:
           - - a
             - FnCall:
                 - "transmute::<f64, _>"
-                - - FnCall: [simd_extract!, [b, 'LANE as u32']]
+                - - FnCall: ['vget{neon_type.lane_nox}', [b], [LANE]]
 
   - name: "vmulq_lane_f64"
     doc: "Floating-point multiply"
@@ -11238,7 +11223,7 @@ intrinsics:
       - ["f64", float64x1_t]
     compose:
       - FnCall: [static_assert!, ['LANE == 0']]
-      - Let: [b, '{type[0]}', {FnCall: [simd_extract!, [b, 'LANE as u32']]}]
+      - Let: [b, '{type[0]}', {FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]}]
       - Identifier: ['a * b', Symbol]
 
   - name: "vmul_laneq_f64"
@@ -11260,7 +11245,7 @@ intrinsics:
           - - a
             - FnCall:
                 - "transmute::<f64, _>"
-                - - FnCall: [simd_extract!, [b, 'LANE as u32']]
+                - - FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]
 
   - name: "vmulq_laneq_f64"
     doc: "Floating-point multiply"
@@ -11338,7 +11323,7 @@ intrinsics:
       - ["f64", float64x2_t, "d_laneq_f64", '1']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
-      - Let: [b, '{type[0]}', {FnCall: [simd_extract!, [b, 'LANE as u32']]}]
+      - Let: [b, '{type[0]}', {FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]}]
       - Identifier: ['a * b', Symbol]
 
 
@@ -11359,7 +11344,7 @@ intrinsics:
       - ["f16", float16x8_t, "h_laneq_f16", '3']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
-      - Let: [b, '{type[0]}', {FnCall: [simd_extract!, [b, 'LANE as u32']]}]
+      - Let: [b, '{type[0]}', {FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]}]
       - Identifier: ['a * b', Symbol]
 
 
@@ -11481,12 +11466,12 @@ intrinsics:
       - ["f64", "u64", 'd_f64']
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[1]}'
           - - FnCall:
                 - "vclt_{type[0]}"
                 - - FnCall: ["vdup_n_{type[0]}", [a]]
                   - FnCall: ["vdup_n_{type[0]}", [b]]
-            - '0'
+          - - '0'
 
 
   - name: "vclt{type[2]}"
@@ -11503,12 +11488,12 @@ intrinsics:
       - ["f16", "u16", 'h_f16']
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[1]}'
           - - FnCall:
                 - "vclt_{type[0]}"
                 - - FnCall: ["vdup_n_{type[0]}", [a]]
                   - FnCall: ["vdup_n_{type[0]}", [b]]
-            - '0'
+          - - '0'
 
   - name: "vabdl_high{neon_type[0].noq}"
     doc: Unsigned Absolute difference Long
@@ -11748,7 +11733,7 @@ intrinsics:
           - - a
             - FnCall:
                 - "vdup{neon_type[0].N}"
-                - - FnCall: [simd_extract!, [b, 'LANE as u32']]
+                - - FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]
 
   - name: "vqabs{type[2]}"
     doc: "Signed saturating absolute value"
@@ -11763,9 +11748,9 @@ intrinsics:
       - ["i16", "s16", 'h_s16']
     compose:
       - FnCall:
-          - "simd_extract!"
+          - 'vget_lane_{type[0]}'
           - - FnCall: ["vqabs_{type[1]}", [{FnCall: ["vdup_n_{type[1]}", [a]]}]]
-            - '0'
+          - - '0'
 
   - name: "vqabs{type[1]}"
     doc: "Signed saturating absolute value"
@@ -11957,11 +11942,11 @@ intrinsics:
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]]
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vqshlu_n_{type[4]}::<N>"
                 - - FnCall: ["vdup_n_{type[4]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vcvta{neon_type[1].no}_{neon_type[0]}"
     doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to away"
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
index 68c92aa54dc6a..08a80c3719876 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@@ -506,11 +506,11 @@ intrinsics:
       - ['h_f16', 'f16']
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[1]}'
           - - FnCall:
                 - "vabs_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
-            - 0
+          - - 0
 
   - name: "vcgt{neon_type[0].no}"
     doc: "Compare signed greater than"
@@ -1716,8 +1716,8 @@ intrinsics:
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 1]]
       - FnCall:
-          - "transmute{type[3]}"
-          - - FnCall: [simd_extract!, [a, 'N as u32']]
+          - "transmute"
+          - - FnCall: ['vget{neon_type[1].lane_nox}', [a], [N]]
 
   - name: "vext{neon_type[0].no}"
     doc: "Extract vector from pair of vectors"
@@ -11461,7 +11461,7 @@ intrinsics:
           - - a
             - FnCall:
                 - "vdup{neon_type[0].N}"
-                - - FnCall: [simd_extract!, [b, 'LANE as u32']]
+                - - FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]
 
   - name: "vrecpe{neon_type.no}"
     doc: "Unsigned reciprocal estimate"

From 7050d6998275df25a8d21dc7aeb8a44866a98b4a Mon Sep 17 00:00:00 2001
From: David Wood <david.wood2@arm.com>
Date: Mon, 20 Apr 2026 10:00:45 +0100
Subject: [PATCH 20/78] gen-arm: use LLVM intrinsics for `f16` to `{i,u}16`

Instead of doing an `as {i,u}16` cast, we can use the same LLVM
intrinsics as when converting to `{i,u}{32,64}`, which is what Clang does
and ensures the intrinsic result matches.
---
 .../core_arch/src/aarch64/neon/generated.rs   |  74 ++++++++--
 .../spec/neon/aarch64.spec.yml                | 132 ++----------------
 2 files changed, 73 insertions(+), 133 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 5a0bbfa09552b..f705954194227 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -8007,7 +8007,14 @@ pub fn vcvtaq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtah_s16_f16(a: f16) -> i16 {
-    vcvtah_s32_f16(a) as i16
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtas.i16.f16"
+        )]
+        fn _vcvtah_s16_f16(a: f16) -> i16;
+    }
+    unsafe { _vcvtah_s16_f16(a) }
 }
 #[doc = "Floating-point convert to integer, rounding to nearest with ties to away"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_s32_f16)"]
@@ -8051,7 +8058,14 @@ pub fn vcvtah_s64_f16(a: f16) -> i64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtah_u16_f16(a: f16) -> u16 {
-    vcvtah_u32_f16(a) as u16
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtau.i16.f16"
+        )]
+        fn _vcvtah_u16_f16(a: f16) -> u16;
+    }
+    unsafe { _vcvtah_u16_f16(a) }
 }
 #[doc = "Floating-point convert to integer, rounding to nearest with ties to away"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_u32_f16)"]
@@ -8693,7 +8707,14 @@ pub fn vcvtmq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtmh_s16_f16(a: f16) -> i16 {
-    vcvtmh_s32_f16(a) as i16
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtms.i16.f16"
+        )]
+        fn _vcvtmh_s16_f16(a: f16) -> i16;
+    }
+    unsafe { _vcvtmh_s16_f16(a) }
 }
 #[doc = "Floating-point convert to integer, rounding towards minus infinity"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_s32_f16)"]
@@ -8729,7 +8750,7 @@ pub fn vcvtmh_s64_f16(a: f16) -> i64 {
     }
     unsafe { _vcvtmh_s64_f16(a) }
 }
-#[doc = "Floating-point convert to integer, rounding towards minus infinity"]
+#[doc = "Floating-point convert to unsigned integer, rounding towards minus infinity"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_u16_f16)"]
 #[inline]
 #[cfg_attr(test, assert_instr(fcvtmu))]
@@ -8737,7 +8758,14 @@ pub fn vcvtmh_s64_f16(a: f16) -> i64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtmh_u16_f16(a: f16) -> u16 {
-    vcvtmh_u32_f16(a) as u16
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtmu.i16.f16"
+        )]
+        fn _vcvtmh_u16_f16(a: f16) -> u16;
+    }
+    unsafe { _vcvtmh_u16_f16(a) }
 }
 #[doc = "Floating-point convert to unsigned integer, rounding towards minus infinity"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_u32_f16)"]
@@ -9041,7 +9069,14 @@ pub fn vcvtnq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtnh_s16_f16(a: f16) -> i16 {
-    vcvtnh_s32_f16(a) as i16
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtns.i16.f16"
+        )]
+        fn _vcvtnh_s16_f16(a: f16) -> i16;
+    }
+    unsafe { _vcvtnh_s16_f16(a) }
 }
 #[doc = "Floating-point convert to integer, rounding to nearest with ties to even"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_s32_f16)"]
@@ -9085,7 +9120,14 @@ pub fn vcvtnh_s64_f16(a: f16) -> i64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtnh_u16_f16(a: f16) -> u16 {
-    vcvtnh_u32_f16(a) as u16
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtnu.i16.f16"
+        )]
+        fn _vcvtnh_u16_f16(a: f16) -> u16;
+    }
+    unsafe { _vcvtnh_u16_f16(a) }
 }
 #[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_u32_f16)"]
@@ -9389,7 +9431,14 @@ pub fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtph_s16_f16(a: f16) -> i16 {
-    vcvtph_s32_f16(a) as i16
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtps.i16.f16"
+        )]
+        fn _vcvtph_s16_f16(a: f16) -> i16;
+    }
+    unsafe { _vcvtph_s16_f16(a) }
 }
 #[doc = "Floating-point convert to integer, rounding to plus infinity"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_s32_f16)"]
@@ -9433,7 +9482,14 @@ pub fn vcvtph_s64_f16(a: f16) -> i64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtph_u16_f16(a: f16) -> u16 {
-    vcvtph_u32_f16(a) as u16
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtpu.i16.f16"
+        )]
+        fn _vcvtph_u16_f16(a: f16) -> u16;
+    }
+    unsafe { _vcvtph_u16_f16(a) }
 }
 #[doc = "Floating-point convert to unsigned integer, rounding to plus infinity"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_u32_f16)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 6950f69731c35..342e66cab4da7 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -1869,9 +1869,9 @@ intrinsics:
       - *target-not-arm64ec
     safety: safe
     types:
+      - ["f16", "u16", 'h_u16_f16']
       - ["f16", "u32", 'h_u32_f16']
       - ["f16", "u64", 'h_u64_f16']
-
     compose:
       - LLVMLink:
           name: "vcvta{type[2]}"
@@ -1891,6 +1891,7 @@ intrinsics:
       - *target-not-arm64ec
     safety: safe
     types:
+      - ["f16", "i16", 'h_s16_f16']
       - ["f16", "i32", 'h_s32_f16']
       - ["f16", "i64", 'h_s64_f16']
     compose:
@@ -1901,37 +1902,6 @@ intrinsics:
             - link: "llvm.aarch64.neon.fcvtas.{type[1]}.{type[0]}"
               arch: aarch64,arm64ec
 
-
-  - name: "vcvta{type[2]}"
-    doc: "Floating-point convert to integer, rounding to nearest with ties to away"
-    arguments: ["a: {type[0]}"]
-    return_type: "{type[1]}"
-    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
-      - *target-not-arm64ec
-    safety: safe
-    types:
-      - ["f16", "i16", 'h_s16_f16', 's32']
-    compose:
-      - 'vcvtah_{type[3]}_f16(a) as i16'
-
-  - name: "vcvta{type[2]}"
-    doc: "Floating-point convert to integer, rounding to nearest with ties to away"
-    arguments: ["a: {type[0]}"]
-    return_type: "{type[1]}"
-    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
-      - *target-not-arm64ec
-    safety: safe
-    types:
-      - ["f16", "u16", 'h_u16_f16', 'u32']
-    compose:
-      - 'vcvtah_{type[3]}_f16(a) as u16'
-
   - name: "vcvta{type[2]}"
     doc: "Floating-point convert to integer, rounding to nearest with ties to away"
     arguments: ["a: {type[0]}"]
@@ -2041,6 +2011,7 @@ intrinsics:
       - *target-not-arm64ec
     safety: safe
     types:
+      - ["f16", "i16", 'h']
       - ["f16", "i32", 'h']
       - ["f16", "i64", 'h']
     compose:
@@ -2051,22 +2022,6 @@ intrinsics:
             - link: "llvm.aarch64.neon.fcvtns.{type[1]}.{type[0]}"
               arch: aarch64,arm64ec
 
-  - name: "vcvtn{type[2]}_{type[1]}_{type[0]}"
-    doc: "Floating-point convert to integer, rounding to nearest with ties to even"
-    arguments: ["a: {type[0]}"]
-    return_type: "{type[1]}"
-    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
-      - *target-not-arm64ec
-    safety: safe
-    types:
-      - ["f16", "i16", 'h', 'i32']
-    compose:
-      - 'vcvtnh_{type[3]}_f16(a) as i16'
-
-
   - name: "vcvtn{type[2]}_{type[1]}_{type[0]}"
     doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even"
     arguments: ["a: {type[0]}"]
@@ -2078,6 +2033,7 @@ intrinsics:
       - *target-not-arm64ec
     safety: safe
     types:
+      - ["f16", "u16", 'h']
       - ["f16", "u32", 'h']
       - ["f16", "u64", 'h']
     compose:
@@ -2088,21 +2044,6 @@ intrinsics:
             - link: "llvm.aarch64.neon.fcvtnu.{type[1]}.{type[0]}"
               arch: aarch64,arm64ec
 
-  - name: "vcvtn{type[2]}_{type[1]}_{type[0]}"
-    doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even"
-    arguments: ["a: {type[0]}"]
-    return_type: "{type[1]}"
-    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
-      - *target-not-arm64ec
-    safety: safe
-    types:
-      - ["f16", "u16", 'h', 'u32']
-    compose:
-      - 'vcvtnh_{type[3]}_f16(a) as u16'
-
   - name: "vcvtm{neon_type[1].no}_{neon_type[0]}"
     doc: "Floating-point convert to signed integer, rounding toward minus infinity"
     arguments: ["a: {neon_type[0]}"]
@@ -2393,6 +2334,7 @@ intrinsics:
       - *target-not-arm64ec
     safety: safe
     types:
+      - ["f16", "i16", 'h']
       - ["f16", "i32", 'h']
       - ["f16", "i64", 'h']
     compose:
@@ -2403,21 +2345,6 @@ intrinsics:
             - link: "llvm.aarch64.neon.fcvtps.{type[1]}.{type[0]}"
               arch: aarch64,arm64ec
 
-  - name: "vcvtp{type[2]}_{type[1]}_{type[0]}"
-    doc: "Floating-point convert to integer, rounding to plus infinity"
-    arguments: ["a: {type[0]}"]
-    return_type: "{type[1]}"
-    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
-      - *target-not-arm64ec
-    safety: safe
-    types:
-      - ["f16", "i16", 'h', 'i32']
-    compose:
-      - 'vcvtph_{type[3]}_f16(a) as i16'
-
   - name: "vcvtp{type[2]}_{type[1]}_{type[0]}"
     doc: "Floating-point convert to unsigned integer, rounding to plus infinity"
     arguments: ["a: {type[0]}"]
@@ -2429,6 +2356,7 @@ intrinsics:
       - *target-not-arm64ec
     safety: safe
     types:
+      - ["f16", "u16", 'h']
       - ["f16", "u32", 'h']
       - ["f16", "u64", 'h']
     compose:
@@ -2439,21 +2367,6 @@ intrinsics:
             - link: "llvm.aarch64.neon.fcvtpu.{type[1]}.{type[0]}"
               arch: aarch64,arm64ec
 
-  - name: "vcvtp{type[2]}_{type[1]}_{type[0]}"
-    doc: "Floating-point convert to unsigned integer, rounding to plus infinity"
-    arguments: ["a: {type[0]}"]
-    return_type: "{type[1]}"
-    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
-      - *target-not-arm64ec
-    safety: safe
-    types:
-      - ["f16", "u16", 'h', 'u32']
-    compose:
-      - 'vcvtph_{type[3]}_f16(a) as u16'
-
   - name: "vdup{neon_type.laneq_nox}"
     doc: "Set all vector lanes to the same value"
     arguments: ["a: {neon_type}"]
@@ -12034,6 +11947,7 @@ intrinsics:
       - *target-not-arm64ec
     safety: safe
     types:
+      - ["f16", "i16", 'h']
       - ["f16", "i32", 'h']
       - ["f16", "i64", 'h']
     compose:
@@ -12044,22 +11958,6 @@ intrinsics:
             - link: "llvm.aarch64.neon.fcvtms.{type[1]}.{type[0]}"
               arch: aarch64,arm64ec
 
-  - name: "vcvtm{type[2]}_{type[1]}_{type[0]}"
-    doc: "Floating-point convert to integer, rounding towards minus infinity"
-    arguments: ["a: {type[0]}"]
-    return_type: "{type[1]}"
-    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
-      - *target-not-arm64ec
-    safety: safe
-    types:
-      - ["f16", "i16", 'h', 'i32']
-    compose:
-      - 'vcvtmh_{type[3]}_f16(a) as i16'
-
-
   - name: "vcvtm{type[2]}_{type[1]}_{type[0]}"
     doc: "Floating-point convert to unsigned integer, rounding towards minus infinity"
     arguments: ["a: {type[0]}"]
@@ -12071,6 +11969,7 @@ intrinsics:
       - *target-not-arm64ec
     safety: safe
     types:
+      - ["f16", "u16", 'h']
       - ["f16", "u32", 'h']
       - ["f16", "u64", 'h']
     compose:
@@ -12081,21 +11980,6 @@ intrinsics:
             - link: "llvm.aarch64.neon.fcvtmu.{type[1]}.{type[0]}"
               arch: aarch64,arm64ec
 
-  - name: "vcvtm{type[2]}_{type[1]}_{type[0]}"
-    doc: "Floating-point convert to integer, rounding towards minus infinity"
-    arguments: ["a: {type[0]}"]
-    return_type: "{type[1]}"
-    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
-      - *target-not-arm64ec
-    safety: safe
-    types:
-      - ["f16", "u16", 'h', 'u32']
-    compose:
-      - 'vcvtmh_{type[3]}_f16(a) as u16'
-
   - name: "vmlal_high_n_{neon_type[1]}"
     doc: "Multiply-add long"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]

From 7fb11ae763ccbb8857460ef54a972c681c3026ce Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Thu, 30 Apr 2026 06:12:54 +0530
Subject: [PATCH 21/78] Use latest clang versions from kernel.org

---
 .../ci/docker/aarch64-unknown-linux-gnu/Dockerfile     | 10 ++++++++--
 .../ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile  |  9 +++++++--
 .../ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile | 10 ++++++++--
 .../ci/docker/x86_64-unknown-linux-gnu/Dockerfile      |  7 +++++++
 4 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
index 2768c521ebccc..8435dd3dedd33 100644
--- a/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
@@ -10,8 +10,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
   qemu-user \
   make \
   file \
-  clang \
-  lld
+  xz-utils \
+  wget
+
+RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz
+RUN mkdir llvm
+RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm
+
+ENV PATH="/llvm/bin:$PATH"
 
 ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
     CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -cpu max -L /usr/aarch64-linux-gnu" \
diff --git a/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile b/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile
index f85c6a2592e99..0e8efc64bbff7 100644
--- a/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile
@@ -9,10 +9,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
   qemu-user \
   make \
   file \
-  clang \
   curl \
   xz-utils \
-  lld
+  wget
 
 ENV TOOLCHAIN="arm-gnu-toolchain-14.3.rel1-x86_64-aarch64_be-none-linux-gnu"
 
@@ -21,6 +20,12 @@ RUN curl -L "https://developer.arm.com/-/media/Files/downloads/gnu/14.3.rel1/bin
 RUN tar -xvf "${TOOLCHAIN}.tar.xz"
 RUN mkdir /toolchains && mv "./${TOOLCHAIN}" /toolchains
 
+RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz
+RUN mkdir llvm
+RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm
+
+ENV PATH="/llvm/bin:$PATH"
+
 ENV AARCH64_BE_TOOLCHAIN="/toolchains/${TOOLCHAIN}"
 ENV AARCH64_BE_LIBC="${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc"
 
diff --git a/library/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/library/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
index 602249c0ece5a..c0a4ed3e706d1 100644
--- a/library/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
+++ b/library/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
@@ -10,8 +10,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
   qemu-user \
   make \
   file \
-  clang \
-  lld
+  wget
+
+RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz
+RUN mkdir llvm
+RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm
+
+ENV PATH="/llvm/bin:$PATH"
+
 ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
     CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \
     OBJDUMP=arm-linux-gnueabihf-objdump
diff --git a/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
index 17c6d25215aeb..ca6192a38d951 100644
--- a/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
@@ -15,6 +15,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 RUN wget http://ci-mirrors.rust-lang.org/sde-external-10.8.0-2026-03-15-lin.tar.xz -O sde.tar.xz
 RUN mkdir intel-sde
 RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde
+
+RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz
+RUN mkdir llvm
+RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm
+
+ENV PATH="/llvm/bin:$PATH"
+
 ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \
             -cpuid-in /checkout/ci/docker/x86_64-unknown-linux-gnu/cpuid.def \
             -rtm-mode full -tsx --"

From 229d480cdcd19962d8fb704fd250ae38ec9fc4ed Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Sat, 9 May 2026 03:48:26 +0530
Subject: [PATCH 22/78] Change implementation of `vext`

---
 .../core_arch/src/aarch64/neon/generated.rs   |  16 +-
 .../crates/core_arch/src/aarch64/neon/mod.rs  |   1 -
 .../src/arm_shared/neon/generated.rs          | 588 ++++++------------
 .../core_arch/src/arm_shared/neon/mod.rs      |   2 +-
 .../spec/neon/aarch64.spec.yml                |  12 +-
 .../spec/neon/arm_shared.spec.yml             | 100 +--
 6 files changed, 235 insertions(+), 484 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 2624e2f22d002..68320e916d0d5 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -10344,13 +10344,7 @@ pub fn veor3q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vextq_f64<const N: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe {
-        match N & 0b1 {
-            0 => simd_shuffle!(a, b, [0, 1]),
-            1 => simd_shuffle!(a, b, [1, 2]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p64)"]
@@ -10361,13 +10355,7 @@ pub fn vextq_f64<const N: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vextq_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe {
-        match N & 0b1 {
-            0 => simd_shuffle!(a, b, [0, 1]),
-            1 => simd_shuffle!(a, b, [1, 2]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
 }
 #[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f64)"]
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
index b140131012bbf..7ce79671f7e4e 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
@@ -12,7 +12,6 @@ pub use self::generated::*;
 
 use crate::{
     core_arch::{arm_shared::*, simd::*},
-    hint::unreachable_unchecked,
     intrinsics::{simd::*, *},
     mem::transmute,
 };
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index 1ae7f3f3b0f2b..8f9595208465c 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -11956,15 +11956,7 @@ pub fn veorq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vext_f16<const N: i32>(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        match N & 0b11 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f32)"]
@@ -11987,13 +11979,7 @@ pub fn vext_f16<const N: i32>(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 )]
 pub fn vext_f32<const N: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe {
-        match N & 0b1 {
-            0 => simd_shuffle!(a, b, [0, 1]),
-            1 => simd_shuffle!(a, b, [1, 2]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s32)"]
@@ -12016,13 +12002,7 @@ pub fn vext_f32<const N: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 )]
 pub fn vext_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe {
-        match N & 0b1 {
-            0 => simd_shuffle!(a, b, [0, 1]),
-            1 => simd_shuffle!(a, b, [1, 2]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u32)"]
@@ -12045,13 +12025,7 @@ pub fn vext_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 )]
 pub fn vext_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe {
-        match N & 0b1 {
-            0 => simd_shuffle!(a, b, [0, 1]),
-            1 => simd_shuffle!(a, b, [1, 2]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s64)"]
@@ -12125,17 +12099,20 @@ pub unsafe fn vext_u64<const N: i32>(a: uint64x1_t, _b: uint64x1_t) -> uint64x1_
 pub fn vext_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     static_assert_uimm_bits!(N, 3);
     unsafe {
-        match N & 0b111 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
-            4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
-            5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
-            6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
-            7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
-            _ => unreachable_unchecked(),
-        }
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        )
     }
 }
 #[doc = "Extract vector from pair of vectors"]
@@ -12160,17 +12137,20 @@ pub fn vext_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 pub fn vextq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     static_assert_uimm_bits!(N, 3);
     unsafe {
-        match N & 0b111 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
-            4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
-            5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
-            6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
-            7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
-            _ => unreachable_unchecked(),
-        }
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        )
     }
 }
 #[doc = "Extract vector from pair of vectors"]
@@ -12195,17 +12175,20 @@ pub fn vextq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 pub fn vext_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     static_assert_uimm_bits!(N, 3);
     unsafe {
-        match N & 0b111 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
-            4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
-            5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
-            6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
-            7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
-            _ => unreachable_unchecked(),
-        }
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        )
     }
 }
 #[doc = "Extract vector from pair of vectors"]
@@ -12230,17 +12213,20 @@ pub fn vext_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 pub fn vextq_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     static_assert_uimm_bits!(N, 3);
     unsafe {
-        match N & 0b111 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
-            4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
-            5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
-            6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
-            7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
-            _ => unreachable_unchecked(),
-        }
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        )
     }
 }
 #[doc = "Extract vector from pair of vectors"]
@@ -12265,17 +12251,20 @@ pub fn vextq_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 pub fn vext_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     static_assert_uimm_bits!(N, 3);
     unsafe {
-        match N & 0b111 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
-            4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
-            5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
-            6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
-            7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
-            _ => unreachable_unchecked(),
-        }
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        )
     }
 }
 #[doc = "Extract vector from pair of vectors"]
@@ -12300,17 +12289,20 @@ pub fn vext_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
 pub fn vextq_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
     static_assert_uimm_bits!(N, 3);
     unsafe {
-        match N & 0b111 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
-            4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
-            5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
-            6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
-            7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
-            _ => unreachable_unchecked(),
-        }
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        )
     }
 }
 #[doc = "Extract vector from pair of vectors"]
@@ -12336,17 +12328,20 @@ pub fn vextq_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
 pub fn vextq_f16<const N: i32>(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     static_assert_uimm_bits!(N, 3);
     unsafe {
-        match N & 0b111 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
-            4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
-            5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
-            6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
-            7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
-            _ => unreachable_unchecked(),
-        }
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        )
     }
 }
 #[doc = "Extract vector from pair of vectors"]
@@ -12370,15 +12365,7 @@ pub fn vextq_f16<const N: i32>(a: float16x8_t, b: float16x8_t) -> float16x8_t {
 )]
 pub fn vextq_f32<const N: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        match N & 0b11 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s16)"]
@@ -12401,15 +12388,7 @@ pub fn vextq_f32<const N: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 )]
 pub fn vext_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        match N & 0b11 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s32)"]
@@ -12432,15 +12411,7 @@ pub fn vext_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 )]
 pub fn vextq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        match N & 0b11 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u16)"]
@@ -12463,15 +12434,7 @@ pub fn vextq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 )]
 pub fn vext_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        match N & 0b11 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u32)"]
@@ -12494,15 +12457,7 @@ pub fn vext_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 )]
 pub fn vextq_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        match N & 0b11 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p16)"]
@@ -12525,15 +12480,7 @@ pub fn vextq_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 )]
 pub fn vext_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        match N & 0b11 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s64)"]
@@ -12556,13 +12503,7 @@ pub fn vext_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
 )]
 pub fn vextq_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe {
-        match N & 0b1 {
-            0 => simd_shuffle!(a, b, [0, 1]),
-            1 => simd_shuffle!(a, b, [1, 2]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u64)"]
@@ -12585,13 +12526,7 @@ pub fn vextq_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
 )]
 pub fn vextq_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe {
-        match N & 0b1 {
-            0 => simd_shuffle!(a, b, [0, 1]),
-            1 => simd_shuffle!(a, b, [1, 2]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s8)"]
@@ -12615,85 +12550,28 @@ pub fn vextq_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 pub fn vextq_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     static_assert_uimm_bits!(N, 4);
     unsafe {
-        match N & 0b1111 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-            1 => simd_shuffle!(
-                a,
-                b,
-                [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
-            ),
-            2 => simd_shuffle!(
-                a,
-                b,
-                [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
-            ),
-            3 => simd_shuffle!(
-                a,
-                b,
-                [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
-            ),
-            4 => simd_shuffle!(
-                a,
-                b,
-                [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
-            ),
-            5 => simd_shuffle!(
-                a,
-                b,
-                [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
-            ),
-            6 => simd_shuffle!(
-                a,
-                b,
-                [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
-            ),
-            7 => simd_shuffle!(
-                a,
-                b,
-                [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
-            ),
-            8 => simd_shuffle!(
-                a,
-                b,
-                [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
-            ),
-            9 => simd_shuffle!(
-                a,
-                b,
-                [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
-            ),
-            10 => simd_shuffle!(
-                a,
-                b,
-                [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
-            ),
-            11 => simd_shuffle!(
-                a,
-                b,
-                [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
-            ),
-            12 => simd_shuffle!(
-                a,
-                b,
-                [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]
-            ),
-            13 => simd_shuffle!(
-                a,
-                b,
-                [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]
-            ),
-            14 => simd_shuffle!(
-                a,
-                b,
-                [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
-            ),
-            15 => simd_shuffle!(
-                a,
-                b,
-                [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
-            ),
-            _ => unreachable_unchecked(),
-        }
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7,
+                N as u32 + 8,
+                N as u32 + 9,
+                N as u32 + 10,
+                N as u32 + 11,
+                N as u32 + 12,
+                N as u32 + 13,
+                N as u32 + 14,
+                N as u32 + 15
+            ]
+        )
     }
 }
 #[doc = "Extract vector from pair of vectors"]
@@ -12718,85 +12596,28 @@ pub fn vextq_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 pub fn vextq_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     static_assert_uimm_bits!(N, 4);
     unsafe {
-        match N & 0b1111 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-            1 => simd_shuffle!(
-                a,
-                b,
-                [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
-            ),
-            2 => simd_shuffle!(
-                a,
-                b,
-                [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
-            ),
-            3 => simd_shuffle!(
-                a,
-                b,
-                [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
-            ),
-            4 => simd_shuffle!(
-                a,
-                b,
-                [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
-            ),
-            5 => simd_shuffle!(
-                a,
-                b,
-                [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
-            ),
-            6 => simd_shuffle!(
-                a,
-                b,
-                [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
-            ),
-            7 => simd_shuffle!(
-                a,
-                b,
-                [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
-            ),
-            8 => simd_shuffle!(
-                a,
-                b,
-                [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
-            ),
-            9 => simd_shuffle!(
-                a,
-                b,
-                [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
-            ),
-            10 => simd_shuffle!(
-                a,
-                b,
-                [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
-            ),
-            11 => simd_shuffle!(
-                a,
-                b,
-                [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
-            ),
-            12 => simd_shuffle!(
-                a,
-                b,
-                [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]
-            ),
-            13 => simd_shuffle!(
-                a,
-                b,
-                [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]
-            ),
-            14 => simd_shuffle!(
-                a,
-                b,
-                [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
-            ),
-            15 => simd_shuffle!(
-                a,
-                b,
-                [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
-            ),
-            _ => unreachable_unchecked(),
-        }
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7,
+                N as u32 + 8,
+                N as u32 + 9,
+                N as u32 + 10,
+                N as u32 + 11,
+                N as u32 + 12,
+                N as u32 + 13,
+                N as u32 + 14,
+                N as u32 + 15
+            ]
+        )
     }
 }
 #[doc = "Extract vector from pair of vectors"]
@@ -12821,85 +12642,28 @@ pub fn vextq_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 pub fn vextq_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     static_assert_uimm_bits!(N, 4);
     unsafe {
-        match N & 0b1111 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-            1 => simd_shuffle!(
-                a,
-                b,
-                [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
-            ),
-            2 => simd_shuffle!(
-                a,
-                b,
-                [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
-            ),
-            3 => simd_shuffle!(
-                a,
-                b,
-                [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
-            ),
-            4 => simd_shuffle!(
-                a,
-                b,
-                [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
-            ),
-            5 => simd_shuffle!(
-                a,
-                b,
-                [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
-            ),
-            6 => simd_shuffle!(
-                a,
-                b,
-                [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
-            ),
-            7 => simd_shuffle!(
-                a,
-                b,
-                [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
-            ),
-            8 => simd_shuffle!(
-                a,
-                b,
-                [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
-            ),
-            9 => simd_shuffle!(
-                a,
-                b,
-                [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
-            ),
-            10 => simd_shuffle!(
-                a,
-                b,
-                [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
-            ),
-            11 => simd_shuffle!(
-                a,
-                b,
-                [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
-            ),
-            12 => simd_shuffle!(
-                a,
-                b,
-                [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]
-            ),
-            13 => simd_shuffle!(
-                a,
-                b,
-                [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]
-            ),
-            14 => simd_shuffle!(
-                a,
-                b,
-                [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
-            ),
-            15 => simd_shuffle!(
-                a,
-                b,
-                [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
-            ),
-            _ => unreachable_unchecked(),
-        }
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7,
+                N as u32 + 8,
+                N as u32 + 9,
+                N as u32 + 10,
+                N as u32 + 11,
+                N as u32 + 12,
+                N as u32 + 13,
+                N as u32 + 14,
+                N as u32 + 15
+            ]
+        )
     }
 }
 #[doc = "Floating-point fused Multiply-Add to accumulator (vector)"]
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
index c4249c2b3c768..12d3c090ed9fa 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
@@ -7,7 +7,7 @@ mod generated;
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub use self::generated::*;
 
-use crate::{core_arch::simd::*, hint::unreachable_unchecked, intrinsics::simd::*, mem::transmute};
+use crate::{core_arch::simd::*, intrinsics::simd::*, mem::transmute};
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 75e7e88362903..76a6b57410e34 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -2662,10 +2662,10 @@ intrinsics:
       - FnCall: [static_assert_uimm_bits!, [N, 2]]
       - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]]
 
-  - name: "vext{neon_type[0].no}"
+  - name: "vext{neon_type.no}"
     doc: "Extract vector from pair of vectors"
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[0]}"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ext, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
@@ -2673,11 +2673,11 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [poly64x2_t, 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
-      - [float64x2_t, 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
+      - poly64x2_t
+      - float64x2_t
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 1]]
-      - Identifier: ["{type[1]}", UnsafeSymbol]
+      - FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1]']]
 
   - name: "vmla{neon_type.no}"
     doc: "Floating-point multiply-add to accumulator"
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
index 08a80c3719876..3ddd8db3b6d4d 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@@ -1719,10 +1719,10 @@ intrinsics:
           - "transmute"
           - - FnCall: ['vget{neon_type[1].lane_nox}', [a], [N]]
 
-  - name: "vext{neon_type[0].no}"
+  - name: "vext{neon_type.no}"
     doc: "Extract vector from pair of vectors"
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[0]}"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 7']]}]]
@@ -1733,20 +1733,20 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int8x8_t, 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
-      - [int16x8_t, 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
-      - [uint8x8_t, 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
-      - [uint16x8_t, 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
-      - [poly8x8_t, 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
-      - [poly16x8_t, 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
+      - int8x8_t
+      - int16x8_t
+      - uint8x8_t
+      - uint16x8_t
+      - poly8x8_t
+      - poly16x8_t
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 3]]
-      - Identifier: ["{type[1]}", UnsafeSymbol]
+      - FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3, N as u32 + 4, N as u32 + 5, N as u32 + 6, N as u32 + 7]']]
 
-  - name: "vext{neon_type[0].no}"
+  - name: "vext{neon_type.no}"
     doc: "Extract vector from pair of vectors"
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[0]}"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 15']]}]]
@@ -1757,17 +1757,17 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int8x16_t, 'match N & 0b1111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), 8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), 9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), 10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), 11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), 12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), 13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), 14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), 15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), }']
-      - [uint8x16_t, 'match N & 0b1111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), 8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), 9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), 10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), 11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), 12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), 13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), 14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), 15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), }']
-      - [poly8x16_t, 'match N & 0b1111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), 8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), 9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), 10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), 11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), 12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), 13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), 14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), 15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), }']
+      - int8x16_t
+      - uint8x16_t
+      - poly8x16_t
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 4]]
-      - Identifier: ["{type[1]}", UnsafeSymbol]
+      - FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3, N as u32 + 4, N as u32 + 5, N as u32 + 6, N as u32 + 7, N as u32 + 8, N as u32 + 9, N as u32 + 10, N as u32 + 11, N as u32 + 12, N as u32 + 13, N as u32 + 14, N as u32 + 15]']]
 
-  - name: "vext{neon_type[0].no}"
+  - name: "vext{neon_type.no}"
     doc: "Extract vector from pair of vectors"
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[0]}"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 3']]}]]
@@ -1778,21 +1778,21 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int16x4_t,'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
-      - [int32x4_t, 'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
-      - [uint16x4_t, 'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
-      - [uint32x4_t, 'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
-      - [poly16x4_t, 'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
-      - [float32x4_t, 'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
+      - int16x4_t
+      - int32x4_t
+      - uint16x4_t
+      - uint32x4_t
+      - poly16x4_t
+      - float32x4_t
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 2]]
-      - Identifier: ["{type[1]}", UnsafeSymbol]
+      - FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]']]
 
 
-  - name: "vext{neon_type[0].no}"
+  - name: "vext{neon_type.no}"
     doc: "Extract vector from pair of vectors"
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[0]}"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 3']]}]]
@@ -1805,15 +1805,15 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [float16x4_t, 'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
+      - float16x4_t
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 2]]
-      - Identifier: ["{type[1]}", UnsafeSymbol]
+      - FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]']]
 
-  - name: "vext{neon_type[0].no}"
+  - name: "vext{neon_type.no}"
     doc: "Extract vector from pair of vectors"
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[0]}"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 7']]}]]
@@ -1826,17 +1826,17 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [float16x8_t, 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
+      - float16x8_t
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 3]]
-      - Identifier: ["{type[1]}", UnsafeSymbol]
+      - FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3, N as u32 + 4, N as u32 + 5, N as u32 + 6, N as u32 + 7]']]
 
 
 
-  - name: "vext{neon_type[0].no}"
+  - name: "vext{neon_type.no}"
     doc: "Extract vector from pair of vectors"
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[0]}"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 1']]}]]
@@ -1847,17 +1847,17 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int32x2_t, 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
-      - [uint32x2_t, 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
-      - [float32x2_t, 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
+      - int32x2_t
+      - uint32x2_t
+      - float32x2_t
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 1]]
-      - Identifier: ["{type[1]}", UnsafeSymbol]
+      - FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1]']]
 
-  - name: "vext{neon_type[0].no}"
+  - name: "vext{neon_type.no}"
     doc: "Extract vector from pair of vectors"
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[0]}"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmov, 'N = 1']]}]]
@@ -1868,11 +1868,11 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int64x2_t, 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
-      - [uint64x2_t, 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
+      - int64x2_t
+      - uint64x2_t
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 1]]
-      - Identifier: ["{type[1]}", UnsafeSymbol]
+      - FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1]']]
 
   - name: "vmla{neon_type[0].no}"
     doc: "Multiply-add to accumulator"

From f603c7fb4bf099e77b2fbd36daf39c7ba4628953 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Sat, 9 May 2026 03:50:54 +0530
Subject: [PATCH 23/78] Change implementation of `vcopy{q}_lane{q}`

---
 .../core_arch/src/aarch64/neon/generated.rs   | 2588 +----------------
 .../spec/neon/aarch64.spec.yml                |  106 +-
 2 files changed, 108 insertions(+), 2586 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 68320e916d0d5..46aede98bb08a 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -4131,13 +4131,7 @@ pub fn vcopy_lane_f32<const LANE1: i32, const LANE2: i32>(
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert_uimm_bits!(LANE2, 1);
-    unsafe {
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_f32::<LANE1>(vget_lane_f32::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_s8)"]
@@ -4152,19 +4146,7 @@ pub fn vcopy_lane_f32<const LANE1: i32, const LANE2: i32>(
 pub fn vcopy_lane_s8<const LANE1: i32, const LANE2: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 3);
-    unsafe {
-        match LANE1 & 0b111 {
-            0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-            2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-            4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-            5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-            6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-            7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_s8::<LANE1>(vget_lane_s8::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_s16)"]
@@ -4179,15 +4161,7 @@ pub fn vcopy_lane_s8<const LANE1: i32, const LANE2: i32>(a: int8x8_t, b: int8x8_
 pub fn vcopy_lane_s16<const LANE1: i32, const LANE2: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 2);
-    unsafe {
-        match LANE1 & 0b11 {
-            0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-            2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_s16::<LANE1>(vget_lane_s16::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_s32)"]
@@ -4202,13 +4176,7 @@ pub fn vcopy_lane_s16<const LANE1: i32, const LANE2: i32>(a: int16x4_t, b: int16
 pub fn vcopy_lane_s32<const LANE1: i32, const LANE2: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert_uimm_bits!(LANE2, 1);
-    unsafe {
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_s32::<LANE1>(vget_lane_s32::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u8)"]
@@ -4223,19 +4191,7 @@ pub fn vcopy_lane_s32<const LANE1: i32, const LANE2: i32>(a: int32x2_t, b: int32
 pub fn vcopy_lane_u8<const LANE1: i32, const LANE2: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 3);
-    unsafe {
-        match LANE1 & 0b111 {
-            0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-            2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-            4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-            5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-            6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-            7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_u8::<LANE1>(vget_lane_u8::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u16)"]
@@ -4253,15 +4209,7 @@ pub fn vcopy_lane_u16<const LANE1: i32, const LANE2: i32>(
 ) -> uint16x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 2);
-    unsafe {
-        match LANE1 & 0b11 {
-            0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-            2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_u16::<LANE1>(vget_lane_u16::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u32)"]
@@ -4279,13 +4227,7 @@ pub fn vcopy_lane_u32<const LANE1: i32, const LANE2: i32>(
 ) -> uint32x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert_uimm_bits!(LANE2, 1);
-    unsafe {
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_u32::<LANE1>(vget_lane_u32::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_p8)"]
@@ -4300,19 +4242,7 @@ pub fn vcopy_lane_u32<const LANE1: i32, const LANE2: i32>(
 pub fn vcopy_lane_p8<const LANE1: i32, const LANE2: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 3);
-    unsafe {
-        match LANE1 & 0b111 {
-            0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-            2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-            4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-            5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-            6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-            7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_p8::<LANE1>(vget_lane_p8::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_p16)"]
@@ -4330,15 +4260,7 @@ pub fn vcopy_lane_p16<const LANE1: i32, const LANE2: i32>(
 ) -> poly16x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 2);
-    unsafe {
-        match LANE1 & 0b11 {
-            0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-            2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_p16::<LANE1>(vget_lane_p16::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_f64)"]
@@ -4416,14 +4338,7 @@ pub fn vcopy_laneq_f32<const LANE1: i32, const LANE2: i32>(
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert_uimm_bits!(LANE2, 2);
-    unsafe {
-        let a: float32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_f32::<LANE1>(vgetq_lane_f32::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s8)"]
@@ -4438,21 +4353,7 @@ pub fn vcopy_laneq_f32<const LANE1: i32, const LANE2: i32>(
 pub fn vcopy_laneq_s8<const LANE1: i32, const LANE2: i32>(a: int8x8_t, b: int8x16_t) -> int8x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 4);
-    unsafe {
-        let a: int8x16_t =
-            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
-        match LANE1 & 0b111 {
-            0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-            2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]),
-            4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]),
-            5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]),
-            6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]),
-            7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_s8::<LANE1>(vgetq_lane_s8::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s16)"]
@@ -4470,16 +4371,7 @@ pub fn vcopy_laneq_s16<const LANE1: i32, const LANE2: i32>(
 ) -> int16x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 3);
-    unsafe {
-        let a: int16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
-        match LANE1 & 0b11 {
-            0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]),
-            2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_s16::<LANE1>(vgetq_lane_s16::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s32)"]
@@ -4497,14 +4389,7 @@ pub fn vcopy_laneq_s32<const LANE1: i32, const LANE2: i32>(
 ) -> int32x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert_uimm_bits!(LANE2, 2);
-    unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_s32::<LANE1>(vgetq_lane_s32::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u8)"]
@@ -4522,21 +4407,7 @@ pub fn vcopy_laneq_u8<const LANE1: i32, const LANE2: i32>(
 ) -> uint8x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 4);
-    unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
-        match LANE1 & 0b111 {
-            0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-            2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]),
-            4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]),
-            5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]),
-            6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]),
-            7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_u8::<LANE1>(vgetq_lane_u8::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u16)"]
@@ -4554,16 +4425,7 @@ pub fn vcopy_laneq_u16<const LANE1: i32, const LANE2: i32>(
 ) -> uint16x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 3);
-    unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
-        match LANE1 & 0b11 {
-            0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]),
-            2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_u16::<LANE1>(vgetq_lane_u16::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u32)"]
@@ -4581,14 +4443,7 @@ pub fn vcopy_laneq_u32<const LANE1: i32, const LANE2: i32>(
 ) -> uint32x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert_uimm_bits!(LANE2, 2);
-    unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_u32::<LANE1>(vgetq_lane_u32::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_p8)"]
@@ -4606,21 +4461,7 @@ pub fn vcopy_laneq_p8<const LANE1: i32, const LANE2: i32>(
 ) -> poly8x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 4);
-    unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
-        match LANE1 & 0b111 {
-            0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-            2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]),
-            4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]),
-            5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]),
-            6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]),
-            7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_p8::<LANE1>(vgetq_lane_p8::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_p16)"]
@@ -4638,16 +4479,7 @@ pub fn vcopy_laneq_p16<const LANE1: i32, const LANE2: i32>(
 ) -> poly16x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 3);
-    unsafe {
-        let a: poly16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
-        match LANE1 & 0b11 {
-            0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]),
-            2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_p16::<LANE1>(vgetq_lane_p16::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_f64)"]
@@ -4722,16 +4554,7 @@ pub fn vcopyq_lane_f32<const LANE1: i32, const LANE2: i32>(
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 1);
-    unsafe {
-        let b: float32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);
-        match LANE1 & 0b11 {
-            0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-            2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_f32::<LANE1>(vget_lane_f32::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_f64)"]
@@ -4749,14 +4572,8 @@ pub fn vcopyq_lane_f64<const LANE1: i32, const LANE2: i32>(
 ) -> float64x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert!(LANE2 == 0);
-    unsafe {
-        let b: float64x2_t = simd_shuffle!(b, b, [0, 1]);
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    let b: float64x2_t = vcombine_f64(b, b);
+    vsetq_lane_f64::<LANE1>(vgetq_lane_f64::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s64)"]
@@ -4774,14 +4591,8 @@ pub fn vcopyq_lane_s64<const LANE1: i32, const LANE2: i32>(
 ) -> int64x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert!(LANE2 == 0);
-    unsafe {
-        let b: int64x2_t = simd_shuffle!(b, b, [0, 1]);
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    let b: int64x2_t = vcombine_s64(b, b);
+    vsetq_lane_s64::<LANE1>(vgetq_lane_s64::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u64)"]
@@ -4799,14 +4610,8 @@ pub fn vcopyq_lane_u64<const LANE1: i32, const LANE2: i32>(
 ) -> uint64x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert!(LANE2 == 0);
-    unsafe {
-        let b: uint64x2_t = simd_shuffle!(b, b, [0, 1]);
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    let b: uint64x2_t = vcombine_u64(b, b);
+    vsetq_lane_u64::<LANE1>(vgetq_lane_u64::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p64)"]
@@ -4824,14 +4629,8 @@ pub fn vcopyq_lane_p64<const LANE1: i32, const LANE2: i32>(
 ) -> poly64x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert!(LANE2 == 0);
-    unsafe {
-        let b: poly64x2_t = simd_shuffle!(b, b, [0, 1]);
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    let b: poly64x2_t = vcombine_p64(b, b);
+    vsetq_lane_p64::<LANE1>(vgetq_lane_p64::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s8)"]
@@ -4846,365 +4645,8 @@ pub fn vcopyq_lane_p64<const LANE1: i32, const LANE2: i32>(
 pub fn vcopyq_lane_s8<const LANE1: i32, const LANE2: i32>(a: int8x16_t, b: int8x8_t) -> int8x16_t {
     static_assert_uimm_bits!(LANE1, 4);
     static_assert_uimm_bits!(LANE2, 3);
-    unsafe {
-        let b: int8x16_t =
-            simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
-        match LANE1 & 0b1111 {
-            0 => simd_shuffle!(
-                a,
-                b,
-                [
-                    16 + LANE2 as u32,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            1 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    16 + LANE2 as u32,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            2 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    16 + LANE2 as u32,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            3 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    16 + LANE2 as u32,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            4 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    16 + LANE2 as u32,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            5 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    16 + LANE2 as u32,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            6 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    16 + LANE2 as u32,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            7 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    16 + LANE2 as u32,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            8 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    16 + LANE2 as u32,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            9 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    16 + LANE2 as u32,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            10 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    16 + LANE2 as u32,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            11 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    16 + LANE2 as u32,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            12 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    16 + LANE2 as u32,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            13 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    16 + LANE2 as u32,
-                    14,
-                    15
-                ]
-            ),
-            14 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    16 + LANE2 as u32,
-                    15
-                ]
-            ),
-            15 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    16 + LANE2 as u32
-                ]
-            ),
-            _ => unreachable_unchecked(),
-        }
-    }
+    let b: int8x16_t = vcombine_s8(b, b);
+    vsetq_lane_s8::<LANE1>(vgetq_lane_s8::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s16)"]
@@ -5222,20 +4664,8 @@ pub fn vcopyq_lane_s16<const LANE1: i32, const LANE2: i32>(
 ) -> int16x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 2);
-    unsafe {
-        let b: int16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);
-        match LANE1 & 0b111 {
-            0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-            2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-            4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-            5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-            6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-            7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    let b: int16x8_t = vcombine_s16(b, b);
+    vsetq_lane_s16::<LANE1>(vgetq_lane_s16::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s32)"]
@@ -5253,16 +4683,8 @@ pub fn vcopyq_lane_s32<const LANE1: i32, const LANE2: i32>(
 ) -> int32x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 1);
-    unsafe {
-        let b: int32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);
-        match LANE1 & 0b11 {
-            0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-            2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    let b: int32x4_t = vcombine_s32(b, b);
+    vsetq_lane_s32::<LANE1>(vgetq_lane_s32::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u8)"]
@@ -5280,365 +4702,8 @@ pub fn vcopyq_lane_u8<const LANE1: i32, const LANE2: i32>(
 ) -> uint8x16_t {
     static_assert_uimm_bits!(LANE1, 4);
     static_assert_uimm_bits!(LANE2, 3);
-    unsafe {
-        let b: uint8x16_t =
-            simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
-        match LANE1 & 0b1111 {
-            0 => simd_shuffle!(
-                a,
-                b,
-                [
-                    16 + LANE2 as u32,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            1 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    16 + LANE2 as u32,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            2 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    16 + LANE2 as u32,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            3 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    16 + LANE2 as u32,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            4 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    16 + LANE2 as u32,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            5 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    16 + LANE2 as u32,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            6 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    16 + LANE2 as u32,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            7 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    16 + LANE2 as u32,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            8 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    16 + LANE2 as u32,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            9 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    16 + LANE2 as u32,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            10 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    16 + LANE2 as u32,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            11 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    16 + LANE2 as u32,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            12 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    16 + LANE2 as u32,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            13 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    16 + LANE2 as u32,
-                    14,
-                    15
-                ]
-            ),
-            14 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    16 + LANE2 as u32,
-                    15
-                ]
-            ),
-            15 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    16 + LANE2 as u32
-                ]
-            ),
-            _ => unreachable_unchecked(),
-        }
-    }
+    let b: uint8x16_t = vcombine_u8(b, b);
+    vsetq_lane_u8::<LANE1>(vgetq_lane_u8::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u16)"]
@@ -5656,20 +4721,8 @@ pub fn vcopyq_lane_u16<const LANE1: i32, const LANE2: i32>(
 ) -> uint16x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 2);
-    unsafe {
-        let b: uint16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);
-        match LANE1 & 0b111 {
-            0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-            2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-            4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-            5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-            6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-            7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    let b: uint16x8_t = vcombine_u16(b, b);
+    vsetq_lane_u16::<LANE1>(vgetq_lane_u16::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u32)"]
@@ -5687,16 +4740,8 @@ pub fn vcopyq_lane_u32<const LANE1: i32, const LANE2: i32>(
 ) -> uint32x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 1);
-    unsafe {
-        let b: uint32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);
-        match LANE1 & 0b11 {
-            0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-            2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    let b: uint32x4_t = vcombine_u32(b, b);
+    vsetq_lane_u32::<LANE1>(vgetq_lane_u32::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p8)"]
@@ -5714,365 +4759,8 @@ pub fn vcopyq_lane_p8<const LANE1: i32, const LANE2: i32>(
 ) -> poly8x16_t {
     static_assert_uimm_bits!(LANE1, 4);
     static_assert_uimm_bits!(LANE2, 3);
-    unsafe {
-        let b: poly8x16_t =
-            simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
-        match LANE1 & 0b1111 {
-            0 => simd_shuffle!(
-                a,
-                b,
-                [
-                    16 + LANE2 as u32,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            1 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    16 + LANE2 as u32,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            2 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    16 + LANE2 as u32,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            3 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    16 + LANE2 as u32,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            4 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    16 + LANE2 as u32,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            5 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    16 + LANE2 as u32,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            6 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    16 + LANE2 as u32,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            7 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    16 + LANE2 as u32,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            8 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    16 + LANE2 as u32,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            9 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    16 + LANE2 as u32,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            10 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    16 + LANE2 as u32,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            11 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    16 + LANE2 as u32,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            12 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    16 + LANE2 as u32,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            13 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    16 + LANE2 as u32,
-                    14,
-                    15
-                ]
-            ),
-            14 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    16 + LANE2 as u32,
-                    15
-                ]
-            ),
-            15 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    16 + LANE2 as u32
-                ]
-            ),
-            _ => unreachable_unchecked(),
-        }
-    }
+    let b: poly8x16_t = vcombine_p8(b, b);
+    vsetq_lane_p8::<LANE1>(vgetq_lane_p8::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p16)"]
@@ -6090,20 +4778,8 @@ pub fn vcopyq_lane_p16<const LANE1: i32, const LANE2: i32>(
 ) -> poly16x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 2);
-    unsafe {
-        let b: poly16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);
-        match LANE1 & 0b111 {
-            0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-            2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-            4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-            5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-            6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-            7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    let b: poly16x8_t = vcombine_p16(b, b);
+    vsetq_lane_p16::<LANE1>(vgetq_lane_p16::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_f32)"]
@@ -6121,15 +4797,7 @@ pub fn vcopyq_laneq_f32<const LANE1: i32, const LANE2: i32>(
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 2);
-    unsafe {
-        match LANE1 & 0b11 {
-            0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-            2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_f32::<LANE1>(vgetq_lane_f32::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_f64)"]
@@ -6147,13 +4815,7 @@ pub fn vcopyq_laneq_f64<const LANE1: i32, const LANE2: i32>(
 ) -> float64x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert_uimm_bits!(LANE2, 1);
-    unsafe {
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_f64::<LANE1>(vgetq_lane_f64::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s8)"]
@@ -6171,363 +4833,7 @@ pub fn vcopyq_laneq_s8<const LANE1: i32, const LANE2: i32>(
 ) -> int8x16_t {
     static_assert_uimm_bits!(LANE1, 4);
     static_assert_uimm_bits!(LANE2, 4);
-    unsafe {
-        match LANE1 & 0b1111 {
-            0 => simd_shuffle!(
-                a,
-                b,
-                [
-                    16 + LANE2 as u32,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            1 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    16 + LANE2 as u32,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            2 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    16 + LANE2 as u32,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            3 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    16 + LANE2 as u32,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            4 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    16 + LANE2 as u32,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            5 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    16 + LANE2 as u32,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            6 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    16 + LANE2 as u32,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            7 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    16 + LANE2 as u32,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            8 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    16 + LANE2 as u32,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            9 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    16 + LANE2 as u32,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            10 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    16 + LANE2 as u32,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            11 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    16 + LANE2 as u32,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            12 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    16 + LANE2 as u32,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            13 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    16 + LANE2 as u32,
-                    14,
-                    15
-                ]
-            ),
-            14 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    16 + LANE2 as u32,
-                    15
-                ]
-            ),
-            15 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    16 + LANE2 as u32
-                ]
-            ),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_s8::<LANE1>(vgetq_lane_s8::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s16)"]
@@ -6545,19 +4851,7 @@ pub fn vcopyq_laneq_s16<const LANE1: i32, const LANE2: i32>(
 ) -> int16x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 3);
-    unsafe {
-        match LANE1 & 0b111 {
-            0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-            2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-            4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-            5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-            6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-            7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_s16::<LANE1>(vgetq_lane_s16::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s32)"]
@@ -6575,15 +4869,7 @@ pub fn vcopyq_laneq_s32<const LANE1: i32, const LANE2: i32>(
 ) -> int32x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 2);
-    unsafe {
-        match LANE1 & 0b11 {
-            0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-            2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_s32::<LANE1>(vgetq_lane_s32::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s64)"]
@@ -6601,13 +4887,7 @@ pub fn vcopyq_laneq_s64<const LANE1: i32, const LANE2: i32>(
 ) -> int64x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert_uimm_bits!(LANE2, 1);
-    unsafe {
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_s64::<LANE1>(vgetq_lane_s64::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u8)"]
@@ -6625,363 +4905,7 @@ pub fn vcopyq_laneq_u8<const LANE1: i32, const LANE2: i32>(
 ) -> uint8x16_t {
     static_assert_uimm_bits!(LANE1, 4);
     static_assert_uimm_bits!(LANE2, 4);
-    unsafe {
-        match LANE1 & 0b1111 {
-            0 => simd_shuffle!(
-                a,
-                b,
-                [
-                    16 + LANE2 as u32,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            1 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    16 + LANE2 as u32,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            2 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    16 + LANE2 as u32,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            3 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    16 + LANE2 as u32,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            4 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    16 + LANE2 as u32,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            5 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    16 + LANE2 as u32,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            6 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    16 + LANE2 as u32,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            7 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    16 + LANE2 as u32,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            8 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    16 + LANE2 as u32,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            9 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    16 + LANE2 as u32,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            10 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    16 + LANE2 as u32,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            11 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    16 + LANE2 as u32,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            12 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    16 + LANE2 as u32,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            13 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    16 + LANE2 as u32,
-                    14,
-                    15
-                ]
-            ),
-            14 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    16 + LANE2 as u32,
-                    15
-                ]
-            ),
-            15 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    16 + LANE2 as u32
-                ]
-            ),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_u8::<LANE1>(vgetq_lane_u8::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u16)"]
@@ -6999,19 +4923,7 @@ pub fn vcopyq_laneq_u16<const LANE1: i32, const LANE2: i32>(
 ) -> uint16x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 3);
-    unsafe {
-        match LANE1 & 0b111 {
-            0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-            2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-            4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-            5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-            6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-            7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_u16::<LANE1>(vgetq_lane_u16::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u32)"]
@@ -7029,15 +4941,7 @@ pub fn vcopyq_laneq_u32<const LANE1: i32, const LANE2: i32>(
 ) -> uint32x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 2);
-    unsafe {
-        match LANE1 & 0b11 {
-            0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-            2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_u32::<LANE1>(vgetq_lane_u32::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u64)"]
@@ -7055,13 +4959,7 @@ pub fn vcopyq_laneq_u64<const LANE1: i32, const LANE2: i32>(
 ) -> uint64x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert_uimm_bits!(LANE2, 1);
-    unsafe {
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_u64::<LANE1>(vgetq_lane_u64::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p8)"]
@@ -7079,363 +4977,7 @@ pub fn vcopyq_laneq_p8<const LANE1: i32, const LANE2: i32>(
 ) -> poly8x16_t {
     static_assert_uimm_bits!(LANE1, 4);
     static_assert_uimm_bits!(LANE2, 4);
-    unsafe {
-        match LANE1 & 0b1111 {
-            0 => simd_shuffle!(
-                a,
-                b,
-                [
-                    16 + LANE2 as u32,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            1 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    16 + LANE2 as u32,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            2 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    16 + LANE2 as u32,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            3 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    16 + LANE2 as u32,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            4 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    16 + LANE2 as u32,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            5 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    16 + LANE2 as u32,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            6 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    16 + LANE2 as u32,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            7 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    16 + LANE2 as u32,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            8 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    16 + LANE2 as u32,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            9 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    16 + LANE2 as u32,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            10 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    16 + LANE2 as u32,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            11 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    16 + LANE2 as u32,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            12 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    16 + LANE2 as u32,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            13 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    16 + LANE2 as u32,
-                    14,
-                    15
-                ]
-            ),
-            14 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    16 + LANE2 as u32,
-                    15
-                ]
-            ),
-            15 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    16 + LANE2 as u32
-                ]
-            ),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_p8::<LANE1>(vgetq_lane_p8::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p16)"]
@@ -7453,19 +4995,7 @@ pub fn vcopyq_laneq_p16<const LANE1: i32, const LANE2: i32>(
 ) -> poly16x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 3);
-    unsafe {
-        match LANE1 & 0b111 {
-            0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-            2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-            4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-            5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-            6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-            7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_p16::<LANE1>(vgetq_lane_p16::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p64)"]
@@ -7483,13 +5013,7 @@ pub fn vcopyq_laneq_p64<const LANE1: i32, const LANE2: i32>(
 ) -> poly64x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert_uimm_bits!(LANE2, 1);
-    unsafe {
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_p64::<LANE1>(vgetq_lane_p64::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f64)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 76a6b57410e34..e88ebc2b7ff03 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -8941,19 +8941,19 @@ intrinsics:
     static_defs: ['const LANE1: i32, const LANE2: i32']
     safety: safe
     types:
-      - [int8x8_t, int8x8_t, int8x8_t, '3', '3', ' match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [int16x4_t, int16x4_t, int16x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [int32x2_t, int32x2_t, int32x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [uint8x8_t, uint8x8_t, uint8x8_t, '3', '3', ' match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [uint16x4_t, uint16x4_t, uint16x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [uint32x2_t, uint32x2_t, uint32x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [poly8x8_t, poly8x8_t, poly8x8_t, '3', '3', ' match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [poly16x4_t, poly16x4_t, poly16x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [float32x2_t, float32x2_t, float32x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [int8x8_t, int8x8_t, int8x8_t, '3', '3']
+      - [int16x4_t, int16x4_t, int16x4_t, '2', '2']
+      - [int32x2_t, int32x2_t, int32x2_t, '1', '1']
+      - [uint8x8_t, uint8x8_t, uint8x8_t, '3', '3']
+      - [uint16x4_t, uint16x4_t, uint16x4_t, '2', '2']
+      - [uint32x2_t, uint32x2_t, uint32x2_t, '1', '1']
+      - [poly8x8_t, poly8x8_t, poly8x8_t, '3', '3']
+      - [poly16x4_t, poly16x4_t, poly16x4_t, '2', '2']
+      - [float32x2_t, float32x2_t, float32x2_t, '1', '1']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']]
       - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']]
-      - Identifier: ["{type[5]}", UnsafeSymbol]
+      - FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE2]]}, a], [LANE1]]
 
   - name: "vcopy{neon_type[0].lane_nox}"
     doc: "Insert vector element from another vector element"
@@ -8966,19 +8966,19 @@ intrinsics:
     static_defs: ['const LANE1: i32, const LANE2: i32']
     safety: safe
     types:
-      - [int8x16_t, int8x8_t, int8x16_t, '4', '3', ' let b: int8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);', 'match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [int16x8_t, int16x4_t, int16x8_t, '3', '2', ' let b: int16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);', 'match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [int32x4_t, int32x2_t, int32x4_t, '2', '1', ' let b: int32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);', 'match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [uint8x16_t, uint8x8_t, uint8x16_t, '4', '3', ' let b: uint8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);', 'match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [uint16x8_t, uint16x4_t, uint16x8_t, '3', '2', ' let b: uint16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);', 'match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [uint32x4_t, uint32x2_t, uint32x4_t, '2', '1', ' let b: uint32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);', 'match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [poly8x16_t, poly8x8_t, poly8x16_t, '4', '3', ' let b: poly8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);', 'match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [poly16x8_t, poly16x4_t, poly16x8_t, '3', '2', ' let b: poly16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);', 'match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [int8x16_t, int8x8_t, int8x16_t, '4', '3']
+      - [int16x8_t, int16x4_t, int16x8_t, '3', '2']
+      - [int32x4_t, int32x2_t, int32x4_t, '2', '1']
+      - [uint8x16_t, uint8x8_t, uint8x16_t, '4', '3']
+      - [uint16x8_t, uint16x4_t, uint16x8_t, '3', '2']
+      - [uint32x4_t, uint32x2_t, uint32x4_t, '2', '1']
+      - [poly8x16_t, poly8x8_t, poly8x16_t, '4', '3']
+      - [poly16x8_t, poly16x4_t, poly16x8_t, '3', '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']]
       - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']]
-      - Identifier: ["{type[5]}", UnsafeSymbol]
-      - Identifier: ["{type[6]}", UnsafeSymbol]
+      - Let: [b, '{neon_type[2]}', {FnCall: ['vcombine{neon_type[1].no}', [b, b]]}]
+      - FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[2].lane_nox}', [b], [LANE2]]}, a], [LANE1]]
 
   - name: "vcopy_lane_{neon_type[0]}"
     doc: "Insert vector element from another vector element"
@@ -9031,23 +9031,23 @@ intrinsics:
     static_defs: ['const LANE1: i32, const LANE2: i32']
     safety: safe
     types:
-      - [int8x16_t, int8x16_t, int8x16_t, '4', '4', ' match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [int16x8_t, int16x8_t, int16x8_t, '3', '3', ' match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [int32x4_t, int32x4_t, int32x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [int64x2_t, int64x2_t, int64x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [uint8x16_t, uint8x16_t, uint8x16_t, '4', '4', ' match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [uint16x8_t, uint16x8_t, uint16x8_t, '3', '3', ' match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [uint32x4_t, uint32x4_t, uint32x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [uint64x2_t, uint64x2_t, uint64x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [poly8x16_t, poly8x16_t, poly8x16_t, '4', '4', ' match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [poly16x8_t, poly16x8_t, poly16x8_t, '3', '3', ' match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [poly64x2_t, poly64x2_t, poly64x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [float32x4_t, float32x4_t, float32x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [float64x2_t, float64x2_t, float64x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [int8x16_t, int8x16_t, int8x16_t, '4', '4']
+      - [int16x8_t, int16x8_t, int16x8_t, '3', '3']
+      - [int32x4_t, int32x4_t, int32x4_t, '2', '2']
+      - [int64x2_t, int64x2_t, int64x2_t, '1', '1']
+      - [uint8x16_t, uint8x16_t, uint8x16_t, '4', '4']
+      - [uint16x8_t, uint16x8_t, uint16x8_t, '3', '3']
+      - [uint32x4_t, uint32x4_t, uint32x4_t, '2', '2']
+      - [uint64x2_t, uint64x2_t, uint64x2_t, '1', '1']
+      - [poly8x16_t, poly8x16_t, poly8x16_t, '4', '4']
+      - [poly16x8_t, poly16x8_t, poly16x8_t, '3', '3']
+      - [poly64x2_t, poly64x2_t, poly64x2_t, '1', '1']
+      - [float32x4_t, float32x4_t, float32x4_t, '2', '2']
+      - [float64x2_t, float64x2_t, float64x2_t, '1', '1']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']]
       - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']]
-      - Identifier: ["{type[5]}", UnsafeSymbol]
+      - FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE2]]}, a], [LANE1]]
 
   - name: "vcopy{neon_type[0].laneq_nox}"
     doc: "Insert vector element from another vector element"
@@ -9060,20 +9060,19 @@ intrinsics:
     static_defs: ['const LANE1: i32, const LANE2: i32']
     safety: safe
     types:
-      - [int8x8_t, int8x16_t, int8x8_t, '3', '4', ' let a: int8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);', 'match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [int16x4_t, int16x8_t, int16x4_t, '2', '3', ' let a: int16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);', 'match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [int32x2_t, int32x4_t, int32x2_t, '1', '2', ' let a: int32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [uint8x8_t, uint8x16_t, uint8x8_t, '3', '4', ' let a: uint8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);', 'match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [uint16x4_t, uint16x8_t, uint16x4_t, '2', '3', ' let a: uint16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);', 'match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [uint32x2_t, uint32x4_t, uint32x2_t, '1', '2', 'let a: uint32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [poly8x8_t, poly8x16_t, poly8x8_t, '3', '4', ' let a: poly8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);', 'match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [poly16x4_t, poly16x8_t, poly16x4_t, '2', '3', ' let a: poly16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);', 'match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [float32x2_t, float32x4_t, float32x2_t, '1', '2', ' let a: float32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [int8x8_t, int8x16_t, int8x8_t, '3', '4']
+      - [int16x4_t, int16x8_t, int16x4_t, '2', '3']
+      - [int32x2_t, int32x4_t, int32x2_t, '1', '2']
+      - [uint8x8_t, uint8x16_t, uint8x8_t, '3', '4']
+      - [uint16x4_t, uint16x8_t, uint16x4_t, '2', '3']
+      - [uint32x2_t, uint32x4_t, uint32x2_t, '1', '2']
+      - [poly8x8_t, poly8x16_t, poly8x8_t, '3', '4']
+      - [poly16x4_t, poly16x8_t, poly16x4_t, '2', '3']
+      - [float32x2_t, float32x4_t, float32x2_t, '1', '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']]
       - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']]
-      - Identifier: ["{type[5]}", UnsafeSymbol]
-      - Identifier: ["{type[6]}", UnsafeSymbol]
+      - FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE2]]}, a], [LANE1]]
 
   - name: "vcopyq_lane_{neon_type[0]}"
     doc: "Insert vector element from another vector element"
@@ -9086,15 +9085,15 @@ intrinsics:
     static_defs: ['const LANE1: i32, const LANE2: i32']
     safety: safe
     types:
-      - [int64x2_t, int64x1_t, ' let b: int64x2_t = simd_shuffle!(b, b, [0, 1]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [uint64x2_t, uint64x1_t, ' let b: uint64x2_t = simd_shuffle!(b, b, [0, 1]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [poly64x2_t, poly64x1_t, ' let b: poly64x2_t = simd_shuffle!(b, b, [0, 1]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [float64x2_t, float64x1_t, ' let b: float64x2_t = simd_shuffle!(b, b, [0, 1]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [int64x2_t, int64x1_t]
+      - [uint64x2_t, uint64x1_t]
+      - [poly64x2_t, poly64x1_t]
+      - [float64x2_t, float64x1_t]
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE1, '1']]
       - FnCall: [static_assert!, ['LANE2 == 0']]
-      - Identifier: ['{type[2]}', UnsafeSymbol]
-      - Identifier: ['{type[3]}', UnsafeSymbol]
+      - Let: [b, '{neon_type[0]}', {FnCall: ['vcombine{neon_type[1].no}', [b, b]]}]
+      - FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[0].lane_nox}', [b], [LANE2]]}, a], [LANE1]]
 
   - name: "vcopyq_lane_f32"
     doc: "Insert vector element from another vector element"
@@ -9107,12 +9106,11 @@ intrinsics:
     static_defs: ['const LANE1: i32, const LANE2: i32']
     safety: safe
     types:
-      - [float32x4_t, float32x2_t, ' let b: float32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);', 'match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [float32x4_t, float32x2_t]
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE1, 2]]
       - FnCall: [static_assert_uimm_bits!, [LANE2, 1]]
-      - Identifier: ["{type[2]}", UnsafeSymbol]
-      - Identifier: ["{type[3]}", UnsafeSymbol]
+      - FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE2]]}, a], [LANE1]]
 
   - name: "vcreate_f64"
     doc: "Insert vector element from another vector element"

From 8d56c77c04cb5a81812832fb1a9ef01a153054cf Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Sat, 9 May 2026 04:04:28 +0530
Subject: [PATCH 24/78] Change implementation of `vab{a,d}l_high`

---
 .../core_arch/src/aarch64/neon/generated.rs   | 114 ++++-----
 .../spec/neon/aarch64.spec.yml                | 222 ++----------------
 2 files changed, 71 insertions(+), 265 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 46aede98bb08a..7059b0e4fa20d 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -70,10 +70,10 @@ pub fn __jcvt(a: f64) -> i32 {
     assert_instr(sabal2)
 )]
 pub fn vabal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
+    let d = vget_high_s8(b);
+    let e = vget_high_s8(c);
+    let f = vabd_s8(d, e);
     unsafe {
-        let d: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let e: int8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let f: int8x8_t = vabd_s8(d, e);
         let f: uint8x8_t = simd_cast(f);
         simd_add(a, simd_cast(f))
     }
@@ -88,10 +88,10 @@ pub fn vabal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
     assert_instr(sabal2)
 )]
 pub fn vabal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
+    let d = vget_high_s16(b);
+    let e = vget_high_s16(c);
+    let f = vabd_s16(d, e);
     unsafe {
-        let d: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        let e: int16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
-        let f: int16x4_t = vabd_s16(d, e);
         let f: uint16x4_t = simd_cast(f);
         simd_add(a, simd_cast(f))
     }
@@ -106,10 +106,10 @@ pub fn vabal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
     assert_instr(sabal2)
 )]
 pub fn vabal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+    let d = vget_high_s32(b);
+    let e = vget_high_s32(c);
+    let f = vabd_s32(d, e);
     unsafe {
-        let d: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-        let e: int32x2_t = simd_shuffle!(c, c, [2, 3]);
-        let f: int32x2_t = vabd_s32(d, e);
         let f: uint32x2_t = simd_cast(f);
         simd_add(a, simd_cast(f))
     }
@@ -124,12 +124,10 @@ pub fn vabal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
     assert_instr(uabal2)
 )]
 pub fn vabal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
-    unsafe {
-        let d: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let e: uint8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let f: uint8x8_t = vabd_u8(d, e);
-        simd_add(a, simd_cast(f))
-    }
+    let d = vget_high_u8(b);
+    let e = vget_high_u8(c);
+    let f = vabd_u8(d, e);
+    unsafe { simd_add(a, simd_cast(f)) }
 }
 #[doc = "Unsigned Absolute difference and Accumulate Long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_high_u16)"]
@@ -141,12 +139,10 @@ pub fn vabal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t
     assert_instr(uabal2)
 )]
 pub fn vabal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
-    unsafe {
-        let d: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        let e: uint16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
-        let f: uint16x4_t = vabd_u16(d, e);
-        simd_add(a, simd_cast(f))
-    }
+    let d = vget_high_u16(b);
+    let e = vget_high_u16(c);
+    let f = vabd_u16(d, e);
+    unsafe { simd_add(a, simd_cast(f)) }
 }
 #[doc = "Unsigned Absolute difference and Accumulate Long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_high_u32)"]
@@ -158,12 +154,10 @@ pub fn vabal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t
     assert_instr(uabal2)
 )]
 pub fn vabal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
-    unsafe {
-        let d: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
-        let e: uint32x2_t = simd_shuffle!(c, c, [2, 3]);
-        let f: uint32x2_t = vabd_u32(d, e);
-        simd_add(a, simd_cast(f))
-    }
+    let d = vget_high_u32(b);
+    let e = vget_high_u32(c);
+    let f = vabd_u32(d, e);
+    unsafe { simd_add(a, simd_cast(f)) }
 }
 #[doc = "Absolute difference between the arguments of Floating"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_f64)"]
@@ -226,85 +220,79 @@ pub fn vabdh_f16(a: f16, b: f16) -> f16 {
     vget_lane_f16::<0>(vabd_f16(vdup_n_f16(a), vdup_n_f16(b)))
 }
 #[doc = "Signed Absolute difference Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(sabdl2))]
-pub fn vabdl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+pub fn vabdl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
+    let c = vget_high_s8(a);
+    let d = vget_high_s8(b);
     unsafe {
-        let c: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        let d: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        let e: uint16x4_t = simd_cast(vabd_s16(c, d));
+        let e: uint8x8_t = simd_cast(vabd_s8(c, d));
         simd_cast(e)
     }
 }
 #[doc = "Signed Absolute difference Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(sabdl2))]
-pub fn vabdl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+pub fn vabdl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+    let c = vget_high_s16(a);
+    let d = vget_high_s16(b);
     unsafe {
-        let c: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-        let d: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-        let e: uint32x2_t = simd_cast(vabd_s32(c, d));
+        let e: uint16x4_t = simd_cast(vabd_s16(c, d));
         simd_cast(e)
     }
 }
 #[doc = "Signed Absolute difference Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(sabdl2))]
-pub fn vabdl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
+pub fn vabdl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+    let c = vget_high_s32(a);
+    let d = vget_high_s32(b);
     unsafe {
-        let c: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let d: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let e: uint8x8_t = simd_cast(vabd_s8(c, d));
+        let e: uint32x2_t = simd_cast(vabd_s32(c, d));
         simd_cast(e)
     }
 }
 #[doc = "Unsigned Absolute difference Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(uabdl2))]
-pub fn vabdl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
-    unsafe {
-        let c: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        let d: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        simd_cast(vabd_u16(c, d))
-    }
+pub fn vabdl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
+    let c = vget_high_u8(a);
+    let d = vget_high_u8(b);
+    unsafe { simd_cast(vabd_u8(c, d)) }
 }
 #[doc = "Unsigned Absolute difference Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(uabdl2))]
-pub fn vabdl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
-    unsafe {
-        let c: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
-        let d: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
-        simd_cast(vabd_u32(c, d))
-    }
+pub fn vabdl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
+    let c = vget_high_u16(a);
+    let d = vget_high_u16(b);
+    unsafe { simd_cast(vabd_u16(c, d)) }
 }
 #[doc = "Unsigned Absolute difference Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(uabdl2))]
-pub fn vabdl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
-    unsafe {
-        let c: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let d: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        simd_cast(vabd_u8(c, d))
-    }
+pub fn vabdl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
+    let c = vget_high_u32(a);
+    let d = vget_high_u32(b);
+    unsafe { simd_cast(vabd_u32(c, d)) }
 }
 #[doc = "Floating-point absolute value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabs_f64)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index e88ebc2b7ff03..26a542cdc3872 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -220,103 +220,11 @@ intrinsics:
     safety: safe
     types:
       - [int8x16_t, int16x8_t, int8x8_t, uint8x8_t]
-    compose:
-      - Let:
-          - c
-          - "{neon_type[2]}"
-          - FnCall:
-              - simd_shuffle!
-              - - a
-                - a
-                - [8, 9, 10, 11, 12, 13, 14, 15]
-      - Let:
-          - d
-          - "{neon_type[2]}"
-          - FnCall:
-              - simd_shuffle!
-              - - b
-                - b
-                - [8, 9, 10, 11, 12, 13, 14, 15]
-      - Let:
-          - e
-          - "{neon_type[3]}"
-          - FnCall:
-              - simd_cast
-              - - FnCall:
-                    - "vabd_{neon_type[0]}"
-                    - - c
-                      - d
-      - FnCall:
-          - simd_cast
-          - - e
-
-  - name: "vabdl_high{neon_type[0].noq}"
-    doc: Signed Absolute difference Long
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[1]}"
-    attr:
-      - *neon-stable
-      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sabdl2]]}]]
-    safety: safe
-    types:
       - [int16x8_t, int32x4_t, int16x4_t, uint16x4_t]
-    compose:
-      - Let:
-          - c
-          - "{neon_type[2]}"
-          - FnCall:
-              - simd_shuffle!
-              - - a
-                - a
-                - [4, 5, 6, 7]
-      - Let:
-          - d
-          - "{neon_type[2]}"
-          - FnCall:
-              - simd_shuffle!
-              - - b
-                - b
-                - [4, 5, 6, 7]
-      - Let:
-          - e
-          - "{neon_type[3]}"
-          - FnCall:
-              - simd_cast
-              - - FnCall:
-                    - "vabd_{neon_type[0]}"
-                    - - c
-                      - d
-      - FnCall:
-          - simd_cast
-          - - e
-
-  - name: "vabdl_high{neon_type[0].noq}"
-    doc: Signed Absolute difference Long
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[1]}"
-    attr:
-      - *neon-stable
-      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sabdl2]]}]]
-    safety: safe
-    types:
       - [int32x4_t, int64x2_t, int32x2_t, uint32x2_t]
     compose:
-      - Let:
-          - c
-          - "{neon_type[2]}"
-          - FnCall:
-              - simd_shuffle!
-              - - a
-                - a
-                - [2, 3]
-      - Let:
-          - d
-          - "{neon_type[2]}"
-          - FnCall:
-              - simd_shuffle!
-              - - b
-                - b
-                - [2, 3]
+      - Let: [c, FnCall: ['vget_high_{neon_type[0]}', [a]]]
+      - Let: [d, FnCall: ['vget_high_{neon_type[0]}', [b]]]
       - Let:
           - e
           - "{neon_type[3]}"
@@ -10050,19 +9958,17 @@ intrinsics:
       - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uabal2]]}]]
     safety: safe
     types:
-      - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint32x4_t, uint16x8_t, uint16x4_t, '[4, 5, 6, 7]', '[4, 5, 6, 7]']
-      - [uint64x2_t, uint32x4_t, uint32x2_t, '[2, 3]', '[2, 3]']
+      - [uint16x8_t, uint8x16_t, uint8x8_t]
+      - [uint32x4_t, uint16x8_t, uint16x4_t]
+      - [uint64x2_t, uint32x4_t, uint32x2_t]
     compose:
       - Let:
           - d
-          - "{neon_type[2]}"
-          - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]
+          - FnCall: ['vget_high_{neon_type[1]}', [b]]
       - Let:
           - e
-          - "{neon_type[2]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
-      - Let: [f, "{neon_type[2]}", {FnCall: ["vabd_{neon_type[2]}", [d, e]]}]
+          - FnCall: ['vget_high_{neon_type[1]}', [c]]
+      - Let: [f, {FnCall: ["vabd_{neon_type[2]}", [d, e]]}]
       - FnCall:
           - simd_add
           - - a
@@ -10077,36 +9983,25 @@ intrinsics:
       - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [sabal2]]}]]
     safety: safe
     types:
-      - [int16x8_t, int8x16_t, int8x16_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int8x8_t, uint8x8_t]
-      - [int32x4_t, int16x8_t, int16x8_t, '[4, 5, 6, 7]', int16x4_t, uint16x4_t]
-      - [int64x2_t, int32x4_t, int32x4_t, '[2, 3]', int32x2_t, uint32x2_t]
+      - [int16x8_t, int8x16_t, int8x16_t, int8x8_t, uint8x8_t]
+      - [int32x4_t, int16x8_t, int16x8_t, int16x4_t, uint16x4_t]
+      - [int64x2_t, int32x4_t, int32x4_t, int32x2_t, uint32x2_t]
     compose:
       - Let:
           - d
-          - "{neon_type[4]}"
-          - FnCall:
-              - simd_shuffle!
-              - - b
-                - b
-                - "{type[3]}"
+          - FnCall: ['vget_high_{neon_type[1]}', [b]]
       - Let:
           - e
-          - "{neon_type[4]}"
-          - FnCall:
-              - simd_shuffle!
-              - - c
-                - c
-                - "{type[3]}"
+          - FnCall: ['vget_high_{neon_type[2]}', [c]]
       - Let:
           - f
-          - "{neon_type[4]}"
           - FnCall:
-              - "vabd{neon_type[4].no}"
+              - "vabd{neon_type[3].no}"
               - - d
                 - e
       - Let:
           - f
-          - "{neon_type[5]}"
+          - "{neon_type[4]}"
           - FnCall:
               - simd_cast
               - - f
@@ -11502,93 +11397,16 @@ intrinsics:
       - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uabdl2]]}]]
     safety: safe
     types:
-      - [uint8x16_t, uint16x8_t, uint8x8_t]
+      - [uint8x16_t, uint16x8_t]
+      - [uint16x8_t, uint32x4_t]
+      - [uint32x4_t, uint64x2_t]
     compose:
       - Let:
           - c
-          - "{neon_type[2]}"
-          - FnCall:
-              - simd_shuffle!
-              - - a
-                - a
-                - [8, 9, 10, 11, 12, 13, 14, 15]
+          - FnCall: ['vget_high_{neon_type[0]}', [a]]
       - Let:
           - d
-          - "{neon_type[2]}"
-          - FnCall:
-              - simd_shuffle!
-              - - b
-                - b
-                - [8, 9, 10, 11, 12, 13, 14, 15]
-      - FnCall:
-          - simd_cast
-          - - FnCall:
-                - "vabd_{neon_type[0]}"
-                - - c
-                  - d
-
-  - name: "vabdl_high{neon_type[0].noq}"
-    doc: Unsigned Absolute difference Long
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[1]}"
-    attr:
-      - *neon-stable
-      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uabdl2]]}]]
-    safety: safe
-    types:
-      - [uint16x8_t, uint32x4_t, uint16x4_t]
-    compose:
-      - Let:
-          - c
-          - "{neon_type[2]}"
-          - FnCall:
-              - simd_shuffle!
-              - - a
-                - a
-                - [4, 5, 6, 7]
-      - Let:
-          - d
-          - "{neon_type[2]}"
-          - FnCall:
-              - simd_shuffle!
-              - - b
-                - b
-                - [4, 5, 6, 7]
-
-      - FnCall:
-          - simd_cast
-          - - FnCall:
-                - "vabd_{neon_type[0]}"
-                - - c
-                  - d
-
-  - name: "vabdl_high{neon_type[0].noq}"
-    doc: Unsigned Absolute difference Long
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[1]}"
-    attr:
-      - *neon-stable
-      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uabdl2]]}]]
-    safety: safe
-    types:
-      - [uint32x4_t, uint64x2_t, uint32x2_t, int32x2_t]
-    compose:
-      - Let:
-          - c
-          - "{neon_type[2]}"
-          - FnCall:
-              - simd_shuffle!
-              - - a
-                - a
-                - [2, 3]
-      - Let:
-          - d
-          - "{neon_type[2]}"
-          - FnCall:
-              - simd_shuffle!
-              - - b
-                - b
-                - [2, 3]
+          - FnCall: ['vget_high_{neon_type[0]}', [b]]
       - FnCall:
           - simd_cast
           - - FnCall:

From 4871c490cb5451f9253211ef97202d51344fcddc Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Sat, 9 May 2026 04:14:58 +0530
Subject: [PATCH 25/78] Change implementation of `vcvt{x}_f32_f64`

---
 .../core_arch/src/aarch64/neon/generated.rs    |  9 +++------
 .../stdarch-gen-arm/spec/neon/aarch64.spec.yml | 18 ++++--------------
 2 files changed, 7 insertions(+), 20 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 7059b0e4fa20d..448c4aacdae02 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -5093,7 +5093,7 @@ pub fn vcvt_high_f32_f16(a: float16x8_t) -> float32x4_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(fcvtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcvt_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
-    unsafe { simd_shuffle!(a, simd_cast(b), [0, 1, 2, 3]) }
+    vcombine_f32(a, vcvt_f32_f64(b))
 }
 #[doc = "Floating-point convert to higher precision long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f64_f32)"]
@@ -5102,10 +5102,7 @@ pub fn vcvt_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(fcvtl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcvt_high_f64_f32(a: float32x4_t) -> float64x2_t {
-    unsafe {
-        let b: float32x2_t = simd_shuffle!(a, a, [2, 3]);
-        simd_cast(b)
-    }
+    unsafe { simd_cast(vget_high_f32(a)) }
 }
 #[doc = "Fixed-point convert to floating-point"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f64_s64)"]
@@ -7266,7 +7263,7 @@ pub fn vcvtx_f32_f64(a: float64x2_t) -> float32x2_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(fcvtxn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcvtx_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
-    unsafe { simd_shuffle!(a, vcvtx_f32_f64(b), [0, 1, 2, 3]) }
+    vcombine_f32(a, vcvtx_f32_f64(b))
 }
 #[doc = "Floating-point convert to lower precision narrow, rounding to odd"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtxd_f32_f64)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 26a542cdc3872..ac30a651273a2 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -1417,15 +1417,7 @@ intrinsics:
     types:
       - [float32x4_t, float64x2_t]
     compose:
-      - Let:
-          - b
-          - float32x2_t
-          - FnCall:
-              - simd_shuffle!
-              - - a
-                - a
-                - '[2, 3]'
-      - FnCall: [simd_cast, [b]]
+      - FnCall: [simd_cast, [{FnCall: ['vget_high_{neon_type[0]}', [a]]}]]
 
   - name: "vcvt_high_f16_f32"
     doc: "Floating-point convert to lower precision"
@@ -1486,10 +1478,9 @@ intrinsics:
       - [float32x2_t, float64x2_t, float32x4_t]
     compose:
       - FnCall:
-          - simd_shuffle!
+          - vcombine_f32
           - - a
-            - FnCall: [simd_cast, [b]]
-            - '[0, 1, 2, 3]'
+            - FnCall: [vcvt_f32_f64, [b]]
 
   - name: "vcvtx_f32_f64"
     doc: "Floating-point convert to lower precision narrow, rounding to odd"
@@ -1538,10 +1529,9 @@ intrinsics:
       - [float32x2_t, float64x2_t, float32x4_t]
     compose:
       - FnCall:
-          - simd_shuffle!
+          - vcombine_f32
           - - a
             - FnCall: [vcvtx_f32_f64, [b]]
-            - '[0, 1, 2, 3]'
 
   - name: "vcvt{type[2]}"
     doc: "Floating-point convert to fixed-point, rounding toward zero"

From da1b2021f59190cc0ac914fc60d9a0549d3dad4a Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Sat, 9 May 2026 04:15:47 +0530
Subject: [PATCH 26/78] Change implementation of `vml{a,s}{l}_lane`

---
 .../core_arch/src/aarch64/neon/generated.rs   |  96 ++--
 .../src/arm_shared/neon/generated.rs          | 432 +++---------------
 .../spec/neon/aarch64.spec.yml                |  52 +--
 .../spec/neon/arm_shared.spec.yml             | 138 +++---
 4 files changed, 181 insertions(+), 537 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 448c4aacdae02..72462dcb604d2 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -12147,11 +12147,9 @@ pub fn vmlal_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
-    unsafe {
-        let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let c: int8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
-        vmlal_s8(a, b, c)
-    }
+    let b = vget_high_s8(b);
+    let c = vget_high_s8(c);
+    vmlal_s8(a, b, c)
 }
 #[doc = "Signed multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_s16)"]
@@ -12160,11 +12158,9 @@ pub fn vmlal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
-    unsafe {
-        let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        let c: int16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
-        vmlal_s16(a, b, c)
-    }
+    let b = vget_high_s16(b);
+    let c = vget_high_s16(c);
+    vmlal_s16(a, b, c)
 }
 #[doc = "Signed multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_s32)"]
@@ -12173,11 +12169,9 @@ pub fn vmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
-    unsafe {
-        let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-        let c: int32x2_t = simd_shuffle!(c, c, [2, 3]);
-        vmlal_s32(a, b, c)
-    }
+    let b = vget_high_s32(b);
+    let c = vget_high_s32(c);
+    vmlal_s32(a, b, c)
 }
 #[doc = "Unsigned multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_u8)"]
@@ -12186,11 +12180,9 @@ pub fn vmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
-    unsafe {
-        let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let c: uint8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
-        vmlal_u8(a, b, c)
-    }
+    let b = vget_high_u8(b);
+    let c = vget_high_u8(c);
+    vmlal_u8(a, b, c)
 }
 #[doc = "Unsigned multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_u16)"]
@@ -12199,11 +12191,9 @@ pub fn vmlal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
-    unsafe {
-        let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        let c: uint16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
-        vmlal_u16(a, b, c)
-    }
+    let b = vget_high_u16(b);
+    let c = vget_high_u16(c);
+    vmlal_u16(a, b, c)
 }
 #[doc = "Unsigned multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_u32)"]
@@ -12212,11 +12202,9 @@ pub fn vmlal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
-    unsafe {
-        let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
-        let c: uint32x2_t = simd_shuffle!(c, c, [2, 3]);
-        vmlal_u32(a, b, c)
-    }
+    let b = vget_high_u32(b);
+    let c = vget_high_u32(c);
+    vmlal_u32(a, b, c)
 }
 #[doc = "Floating-point multiply-subtract from accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_f64)"]
@@ -12391,11 +12379,9 @@ pub fn vmlsl_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
-    unsafe {
-        let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let c: int8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
-        vmlsl_s8(a, b, c)
-    }
+    let b = vget_high_s8(b);
+    let c = vget_high_s8(c);
+    vmlsl_s8(a, b, c)
 }
 #[doc = "Signed multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_s16)"]
@@ -12404,11 +12390,9 @@ pub fn vmlsl_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
-    unsafe {
-        let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        let c: int16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
-        vmlsl_s16(a, b, c)
-    }
+    let b = vget_high_s16(b);
+    let c = vget_high_s16(c);
+    vmlsl_s16(a, b, c)
 }
 #[doc = "Signed multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_s32)"]
@@ -12417,11 +12401,9 @@ pub fn vmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
-    unsafe {
-        let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-        let c: int32x2_t = simd_shuffle!(c, c, [2, 3]);
-        vmlsl_s32(a, b, c)
-    }
+    let b = vget_high_s32(b);
+    let c = vget_high_s32(c);
+    vmlsl_s32(a, b, c)
 }
 #[doc = "Unsigned multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_u8)"]
@@ -12430,11 +12412,9 @@ pub fn vmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
-    unsafe {
-        let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let c: uint8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
-        vmlsl_u8(a, b, c)
-    }
+    let b = vget_high_u8(b);
+    let c = vget_high_u8(c);
+    vmlsl_u8(a, b, c)
 }
 #[doc = "Unsigned multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_u16)"]
@@ -12443,11 +12423,9 @@ pub fn vmlsl_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
-    unsafe {
-        let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        let c: uint16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
-        vmlsl_u16(a, b, c)
-    }
+    let b = vget_high_u16(b);
+    let c = vget_high_u16(c);
+    vmlsl_u16(a, b, c)
 }
 #[doc = "Unsigned multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_u32)"]
@@ -12456,11 +12434,9 @@ pub fn vmlsl_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
-    unsafe {
-        let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
-        let c: uint32x2_t = simd_shuffle!(c, c, [2, 3]);
-        vmlsl_u32(a, b, c)
-    }
+    let b = vget_high_u32(b);
+    let c = vget_high_u32(c);
+    vmlsl_u32(a, b, c)
 }
 #[doc = "Vector move"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_s8)"]
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index 8f9595208465c..134a549daf77c 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -26170,7 +26170,7 @@ pub fn vmla_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmla_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+    vmla_f32(a, b, vdup_lane_f32::<LANE>(c))
 }
 #[doc = "Vector multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_f32)"]
@@ -26197,7 +26197,7 @@ pub fn vmla_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmla_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+    vmla_f32(a, b, vdup_laneq_f32::<LANE>(c))
 }
 #[doc = "Vector multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_f32)"]
@@ -26224,13 +26224,7 @@ pub fn vmlaq_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmlaq_f32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlaq_f32(a, b, vdupq_lane_f32::<LANE>(c))
 }
 #[doc = "Vector multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_f32)"]
@@ -26257,13 +26251,7 @@ pub fn vmlaq_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlaq_f32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlaq_f32(a, b, vdupq_laneq_f32::<LANE>(c))
 }
 #[doc = "Vector multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s16)"]
@@ -26286,13 +26274,7 @@ pub fn vmlaq_laneq_f32<const LANE: i32>(
 )]
 pub fn vmla_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmla_s16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmla_s16(a, b, vdup_lane_s16::<LANE>(c))
 }
 #[doc = "Vector multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u16)"]
@@ -26315,13 +26297,7 @@ pub fn vmla_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t)
 )]
 pub fn vmla_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmla_u16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmla_u16(a, b, vdup_lane_u16::<LANE>(c))
 }
 #[doc = "Vector multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s16)"]
@@ -26344,13 +26320,7 @@ pub fn vmla_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x4_
 )]
 pub fn vmla_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmla_s16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmla_s16(a, b, vdup_laneq_s16::<LANE>(c))
 }
 #[doc = "Vector multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u16)"]
@@ -26373,13 +26343,7 @@ pub fn vmla_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t)
 )]
 pub fn vmla_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmla_u16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmla_u16(a, b, vdup_laneq_u16::<LANE>(c))
 }
 #[doc = "Vector multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s16)"]
@@ -26402,26 +26366,7 @@ pub fn vmla_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x8
 )]
 pub fn vmlaq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlaq_s16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    vmlaq_s16(a, b, vdupq_lane_s16::<LANE>(c))
 }
 #[doc = "Vector multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u16)"]
@@ -26444,26 +26389,7 @@ pub fn vmlaq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t)
 )]
 pub fn vmlaq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlaq_u16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    vmlaq_u16(a, b, vdupq_lane_u16::<LANE>(c))
 }
 #[doc = "Vector multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s16)"]
@@ -26486,26 +26412,7 @@ pub fn vmlaq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x4
 )]
 pub fn vmlaq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmlaq_s16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    vmlaq_s16(a, b, vdupq_laneq_s16::<LANE>(c))
 }
 #[doc = "Vector multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u16)"]
@@ -26528,26 +26435,7 @@ pub fn vmlaq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t
 )]
 pub fn vmlaq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmlaq_u16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    vmlaq_u16(a, b, vdupq_laneq_u16::<LANE>(c))
 }
 #[doc = "Vector multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s32)"]
@@ -26570,7 +26458,7 @@ pub fn vmlaq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x
 )]
 pub fn vmla_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmla_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+    vmla_s32(a, b, vdup_lane_s32::<LANE>(c))
 }
 #[doc = "Vector multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u32)"]
@@ -26593,7 +26481,7 @@ pub fn vmla_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t)
 )]
 pub fn vmla_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmla_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+    vmla_u32(a, b, vdup_lane_u32::<LANE>(c))
 }
 #[doc = "Vector multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s32)"]
@@ -26616,7 +26504,7 @@ pub fn vmla_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x2_
 )]
 pub fn vmla_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmla_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+    vmla_s32(a, b, vdup_laneq_s32::<LANE>(c))
 }
 #[doc = "Vector multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u32)"]
@@ -26639,7 +26527,7 @@ pub fn vmla_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t)
 )]
 pub fn vmla_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmla_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+    vmla_u32(a, b, vdup_laneq_u32::<LANE>(c))
 }
 #[doc = "Vector multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s32)"]
@@ -26662,13 +26550,7 @@ pub fn vmla_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x4
 )]
 pub fn vmlaq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmlaq_s32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlaq_s32(a, b, vdupq_lane_s32::<LANE>(c))
 }
 #[doc = "Vector multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u32)"]
@@ -26691,13 +26573,7 @@ pub fn vmlaq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t)
 )]
 pub fn vmlaq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmlaq_u32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlaq_u32(a, b, vdupq_lane_u32::<LANE>(c))
 }
 #[doc = "Vector multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s32)"]
@@ -26720,13 +26596,7 @@ pub fn vmlaq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x2
 )]
 pub fn vmlaq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlaq_s32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlaq_s32(a, b, vdupq_laneq_s32::<LANE>(c))
 }
 #[doc = "Vector multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u32)"]
@@ -26749,13 +26619,7 @@ pub fn vmlaq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t
 )]
 pub fn vmlaq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlaq_u32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlaq_u32(a, b, vdupq_laneq_u32::<LANE>(c))
 }
 #[doc = "Vector multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_f32)"]
@@ -27240,13 +27104,7 @@ pub fn vmlaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
 )]
 pub fn vmlal_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlal_s16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlal_s16(a, b, vdup_lane_s16::<LANE>(c))
 }
 #[doc = "Vector widening multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s16)"]
@@ -27269,13 +27127,7 @@ pub fn vmlal_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t)
 )]
 pub fn vmlal_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmlal_s16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlal_s16(a, b, vdup_laneq_s16::<LANE>(c))
 }
 #[doc = "Vector widening multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s32)"]
@@ -27298,7 +27150,7 @@ pub fn vmlal_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t
 )]
 pub fn vmlal_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmlal_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+    vmlal_s32(a, b, vdup_lane_s32::<LANE>(c))
 }
 #[doc = "Vector widening multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s32)"]
@@ -27321,7 +27173,7 @@ pub fn vmlal_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t)
 )]
 pub fn vmlal_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmlal_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+    vmlal_s32(a, b, vdup_laneq_s32::<LANE>(c))
 }
 #[doc = "Vector widening multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u16)"]
@@ -27344,13 +27196,7 @@ pub fn vmlal_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t
 )]
 pub fn vmlal_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlal_u16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlal_u16(a, b, vdup_lane_u16::<LANE>(c))
 }
 #[doc = "Vector widening multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u16)"]
@@ -27373,13 +27219,7 @@ pub fn vmlal_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x4
 )]
 pub fn vmlal_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmlal_u16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlal_u16(a, b, vdup_laneq_u16::<LANE>(c))
 }
 #[doc = "Vector widening multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u32)"]
@@ -27402,7 +27242,7 @@ pub fn vmlal_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x
 )]
 pub fn vmlal_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmlal_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+    vmlal_u32(a, b, vdup_lane_u32::<LANE>(c))
 }
 #[doc = "Vector widening multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u32)"]
@@ -27425,7 +27265,7 @@ pub fn vmlal_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x2
 )]
 pub fn vmlal_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmlal_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+    vmlal_u32(a, b, vdup_laneq_u32::<LANE>(c))
 }
 #[doc = "Vector widening multiply accumulate with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s16)"]
@@ -27704,7 +27544,7 @@ pub fn vmls_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmls_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+    vmls_f32(a, b, vdup_lane_f32::<LANE>(c))
 }
 #[doc = "Vector multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_f32)"]
@@ -27731,7 +27571,7 @@ pub fn vmls_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmls_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+    vmls_f32(a, b, vdup_laneq_f32::<LANE>(c))
 }
 #[doc = "Vector multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_f32)"]
@@ -27758,13 +27598,7 @@ pub fn vmlsq_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmlsq_f32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlsq_f32(a, b, vdupq_lane_f32::<LANE>(c))
 }
 #[doc = "Vector multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_f32)"]
@@ -27791,13 +27625,7 @@ pub fn vmlsq_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlsq_f32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlsq_f32(a, b, vdupq_laneq_f32::<LANE>(c))
 }
 #[doc = "Vector multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s16)"]
@@ -27820,13 +27648,7 @@ pub fn vmlsq_laneq_f32<const LANE: i32>(
 )]
 pub fn vmls_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmls_s16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmls_s16(a, b, vdup_lane_s16::<LANE>(c))
 }
 #[doc = "Vector multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u16)"]
@@ -27849,13 +27671,7 @@ pub fn vmls_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t)
 )]
 pub fn vmls_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmls_u16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmls_u16(a, b, vdup_lane_u16::<LANE>(c))
 }
 #[doc = "Vector multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s16)"]
@@ -27878,13 +27694,7 @@ pub fn vmls_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x4_
 )]
 pub fn vmls_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmls_s16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmls_s16(a, b, vdup_laneq_s16::<LANE>(c))
 }
 #[doc = "Vector multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u16)"]
@@ -27907,13 +27717,7 @@ pub fn vmls_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t)
 )]
 pub fn vmls_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmls_u16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmls_u16(a, b, vdup_laneq_u16::<LANE>(c))
 }
 #[doc = "Vector multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s16)"]
@@ -27936,26 +27740,7 @@ pub fn vmls_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x8
 )]
 pub fn vmlsq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlsq_s16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    vmlsq_s16(a, b, vdupq_lane_s16::<LANE>(c))
 }
 #[doc = "Vector multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u16)"]
@@ -27978,26 +27763,7 @@ pub fn vmlsq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t)
 )]
 pub fn vmlsq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlsq_u16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    vmlsq_u16(a, b, vdupq_lane_u16::<LANE>(c))
 }
 #[doc = "Vector multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s16)"]
@@ -28020,26 +27786,7 @@ pub fn vmlsq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x4
 )]
 pub fn vmlsq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmlsq_s16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    vmlsq_s16(a, b, vdupq_laneq_s16::<LANE>(c))
 }
 #[doc = "Vector multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u16)"]
@@ -28062,26 +27809,7 @@ pub fn vmlsq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t
 )]
 pub fn vmlsq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmlsq_u16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    vmlsq_u16(a, b, vdupq_laneq_u16::<LANE>(c))
 }
 #[doc = "Vector multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s32)"]
@@ -28104,7 +27832,7 @@ pub fn vmlsq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x
 )]
 pub fn vmls_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmls_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+    vmls_s32(a, b, vdup_lane_s32::<LANE>(c))
 }
 #[doc = "Vector multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u32)"]
@@ -28127,7 +27855,7 @@ pub fn vmls_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t)
 )]
 pub fn vmls_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmls_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+    vmls_u32(a, b, vdup_lane_u32::<LANE>(c))
 }
 #[doc = "Vector multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s32)"]
@@ -28150,7 +27878,7 @@ pub fn vmls_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x2_
 )]
 pub fn vmls_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmls_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+    vmls_s32(a, b, vdup_laneq_s32::<LANE>(c))
 }
 #[doc = "Vector multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u32)"]
@@ -28173,7 +27901,7 @@ pub fn vmls_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t)
 )]
 pub fn vmls_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmls_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+    vmls_u32(a, b, vdup_laneq_u32::<LANE>(c))
 }
 #[doc = "Vector multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s32)"]
@@ -28196,13 +27924,7 @@ pub fn vmls_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x4
 )]
 pub fn vmlsq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmlsq_s32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlsq_s32(a, b, vdupq_lane_s32::<LANE>(c))
 }
 #[doc = "Vector multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u32)"]
@@ -28225,13 +27947,7 @@ pub fn vmlsq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t)
 )]
 pub fn vmlsq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmlsq_u32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlsq_u32(a, b, vdupq_lane_u32::<LANE>(c))
 }
 #[doc = "Vector multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s32)"]
@@ -28254,13 +27970,7 @@ pub fn vmlsq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x2
 )]
 pub fn vmlsq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlsq_s32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlsq_s32(a, b, vdupq_laneq_s32::<LANE>(c))
 }
 #[doc = "Vector multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u32)"]
@@ -28283,13 +27993,7 @@ pub fn vmlsq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t
 )]
 pub fn vmlsq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlsq_u32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlsq_u32(a, b, vdupq_laneq_u32::<LANE>(c))
 }
 #[doc = "Vector multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_f32)"]
@@ -28774,13 +28478,7 @@ pub fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
 )]
 pub fn vmlsl_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlsl_s16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlsl_s16(a, b, vdup_lane_s16::<LANE>(c))
 }
 #[doc = "Vector widening multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s16)"]
@@ -28803,13 +28501,7 @@ pub fn vmlsl_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t)
 )]
 pub fn vmlsl_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmlsl_s16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlsl_s16(a, b, vdup_laneq_s16::<LANE>(c))
 }
 #[doc = "Vector widening multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s32)"]
@@ -28832,7 +28524,7 @@ pub fn vmlsl_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t
 )]
 pub fn vmlsl_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmlsl_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+    vmlsl_s32(a, b, vdup_lane_s32::<LANE>(c))
 }
 #[doc = "Vector widening multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s32)"]
@@ -28855,7 +28547,7 @@ pub fn vmlsl_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t)
 )]
 pub fn vmlsl_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmlsl_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+    vmlsl_s32(a, b, vdup_laneq_s32::<LANE>(c))
 }
 #[doc = "Vector widening multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u16)"]
@@ -28878,13 +28570,7 @@ pub fn vmlsl_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t
 )]
 pub fn vmlsl_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlsl_u16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlsl_u16(a, b, vdup_lane_u16::<LANE>(c))
 }
 #[doc = "Vector widening multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u16)"]
@@ -28907,13 +28593,7 @@ pub fn vmlsl_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x4
 )]
 pub fn vmlsl_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmlsl_u16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlsl_u16(a, b, vdup_laneq_u16::<LANE>(c))
 }
 #[doc = "Vector widening multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u32)"]
@@ -28936,7 +28616,7 @@ pub fn vmlsl_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x
 )]
 pub fn vmlsl_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmlsl_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+    vmlsl_u32(a, b, vdup_lane_u32::<LANE>(c))
 }
 #[doc = "Vector widening multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u32)"]
@@ -28959,7 +28639,7 @@ pub fn vmlsl_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x2
 )]
 pub fn vmlsl_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmlsl_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+    vmlsl_u32(a, b, vdup_laneq_u32::<LANE>(c))
 }
 #[doc = "Vector widening multiply subtract with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s16)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index ac30a651273a2..37f28ca129dbe 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -2600,12 +2600,12 @@ intrinsics:
       - *neon-stable
     safety: safe
     types:
-      - [int16x8_t, int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int32x4_t, int16x8_t, int16x4_t, '[4, 5, 6, 7]', '[4, 5, 6, 7]']
-      - [int64x2_t, int32x4_t, int32x2_t, '[2, 3]', '[2, 3]']
+      - [int16x8_t, int8x16_t, int8x8_t]
+      - [int32x4_t, int16x8_t, int16x4_t]
+      - [int64x2_t, int32x4_t, int32x2_t]
     compose:
-      - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
-      - Let: [c, "{neon_type[2]}", {FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]}]
+      - Let: [b, {FnCall: ['vget_high_{neon_type[1]}', [b]]}]
+      - Let: [c, {FnCall: ['vget_high_{neon_type[1]}', [c]]}]
       - FnCall: ["vmlal_{neon_type[2]}", [a, b, c]]
 
   - name: "vmlal_high_{neon_type[1]}"
@@ -2617,18 +2617,12 @@ intrinsics:
       - *neon-stable
     safety: safe
     types:
-      - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint32x4_t, uint16x8_t, uint16x4_t, '[4, 5, 6, 7]']
-      - [uint64x2_t, uint32x4_t, uint32x2_t, '[2, 3]']
+      - [uint16x8_t, uint8x16_t, uint8x8_t]
+      - [uint32x4_t, uint16x8_t, uint16x4_t]
+      - [uint64x2_t, uint32x4_t, uint32x2_t]
     compose:
-      - Let:
-          - b
-          - "{neon_type[2]}"
-          - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]
-      - Let:
-          - c
-          - "{neon_type[2]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
+      - Let: [b, {FnCall: ['vget_high_{neon_type[1]}', [b]]}]
+      - Let: [c, {FnCall: ['vget_high_{neon_type[1]}', [c]]}]
       - FnCall: ["vmlal_{neon_type[1]}", [a, b, c]]
 
   - name: "vmlsl_high_{neon_type[1]}"
@@ -2640,18 +2634,12 @@ intrinsics:
       - *neon-stable
     safety: safe
     types:
-      - [int16x8_t, int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int32x4_t, int16x8_t, int16x4_t, '[4, 5, 6, 7]']
-      - [int64x2_t, int32x4_t, int32x2_t, '[2, 3]']
+      - [int16x8_t, int8x16_t, int8x8_t]
+      - [int32x4_t, int16x8_t, int16x4_t]
+      - [int64x2_t, int32x4_t, int32x2_t]
     compose:
-      - Let:
-          - b
-          - "{neon_type[2]}"
-          - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]
-      - Let:
-          - c
-          - "{neon_type[2]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
+      - Let: [b, {FnCall: ['vget_high_{neon_type[1]}', [b]]}]
+      - Let: [c, {FnCall: ['vget_high_{neon_type[1]}', [c]]}]
       - FnCall: ["vmlsl_{neon_type[1]}", [a, b, c]]
 
   - name: "vmlsl_high_{neon_type[1]}"
@@ -2663,12 +2651,12 @@ intrinsics:
       - *neon-stable
     safety: safe
     types:
-      - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint32x4_t, uint16x8_t, uint16x4_t, '[4, 5, 6, 7]']
-      - [uint64x2_t, uint32x4_t, uint32x2_t, '[2, 3]']
+      - [uint16x8_t, uint8x16_t, uint8x8_t]
+      - [uint32x4_t, uint16x8_t, uint16x4_t]
+      - [uint64x2_t, uint32x4_t, uint32x2_t]
     compose:
-      - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
-      - Let: [c, "{neon_type[2]}", {FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]}]
+      - Let: [b, {FnCall: ['vget_high_{neon_type[1]}', [b]]}]
+      - Let: [c, {FnCall: ['vget_high_{neon_type[1]}', [c]]}]
       - FnCall: ["vmlsl_{neon_type[1]}", [a, b, c]]
 
   - name: "vmovn_high{neon_type[1].noq}"
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
index 3ddd8db3b6d4d..a56c6079dc9ab 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@@ -1992,17 +1992,17 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [int32x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int32x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int64x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [int64x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
+      - [int32x4_t, int16x4_t, int16x4_t, '2']
+      - [int32x4_t, int16x4_t, int16x8_t, '3']
+      - [int64x2_t, int32x2_t, int32x2_t, '1']
+      - [int64x2_t, int32x2_t, int32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
       - FnCall:
           - "vmlal_{neon_type[1]}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
+            - FnCall: ['vdup_lane{neon_type[2].no}', [c], [LANE]]
 
   - name: "vmlal_lane{neon_type[2].no}"
     doc: "Vector widening multiply accumulate with scalar"
@@ -2018,17 +2018,17 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [uint32x4_t, uint16x4_t, uint16x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint32x4_t, uint16x4_t, uint16x8_t, uint32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint64x2_t, uint32x2_t, uint32x2_t, uint64x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [uint64x2_t, uint32x2_t, uint32x4_t, uint64x2_t, '2', '[LANE as u32, LANE as u32]']
+      - [uint32x4_t, uint16x4_t, uint16x4_t, '2']
+      - [uint32x4_t, uint16x4_t, uint16x8_t, '3']
+      - [uint64x2_t, uint32x2_t, uint32x2_t, '1']
+      - [uint64x2_t, uint32x2_t, uint32x4_t, '2']
     compose:
-      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[4]}"]]
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
       - FnCall:
           - "vmlal_{neon_type[1]}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, '{type[5]}']]
+            - FnCall: ['vdup_lane{neon_type[2].no}', [c], [LANE]]
 
   - name: "vmlal_{neon_type[1]}"
     doc: "Unsigned multiply-add long"
@@ -2147,15 +2147,15 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [int32x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int32x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int32x4_t, int16x4_t, int16x4_t, '2']
+      - [int32x4_t, int16x4_t, int16x8_t, '3']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall:
           - "vmlsl_{neon_type[1]}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+            - FnCall: ['vdup_lane{neon_type[2].no}', [c], [LANE]]
 
   - name: "vmlsl_lane{neon_type[2].no}"
     doc: "Vector widening multiply subtract with scalar"
@@ -2171,15 +2171,15 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [int64x2_t, int32x2_t, int32x2_t, '[LANE as u32, LANE as u32]', '1']
-      - [int64x2_t, int32x2_t, int32x4_t, '[LANE as u32, LANE as u32]', '2']
+      - [int64x2_t, int32x2_t, int32x2_t, '1']
+      - [int64x2_t, int32x2_t, int32x4_t, '2']
     compose:
-      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[4]}"]]
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
       - FnCall:
           - "vmlsl_{neon_type[1]}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
+            - FnCall: ['vdup_lane{neon_type[2].no}', [c], [LANE]]
 
   - name: "vmlsl_lane{neon_type[2].no}"
     doc: "Vector widening multiply subtract with scalar"
@@ -2195,17 +2195,17 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [uint32x4_t, uint16x4_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint32x4_t, uint16x4_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint64x2_t, uint32x2_t, uint32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [uint64x2_t, uint32x2_t, uint32x4_t, '2', '[LANE as u32, LANE as u32]']
+      - [uint32x4_t, uint16x4_t, uint16x4_t, '2']
+      - [uint32x4_t, uint16x4_t, uint16x8_t, '3']
+      - [uint64x2_t, uint32x2_t, uint32x2_t, '1']
+      - [uint64x2_t, uint32x2_t, uint32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
       - FnCall:
           - "vmlsl_{neon_type[1]}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+            - FnCall: ['vdup_lane{neon_type[2].no}', [c], [LANE]]
 
   - name: "vmlsl_{neon_type[1]}"
     doc: "Unsigned multiply-subtract long"
@@ -10901,21 +10901,21 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [_lane_s16, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_laneq_s16, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_lane_s16, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_lane_u16, uint16x4_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_laneq_u16, uint16x4_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_lane_u16, uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_u16, uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_lane_s16, int16x4_t, int16x4_t, '2']
+      - [_laneq_s16, int16x4_t, int16x8_t, '3']
+      - [q_lane_s16, int16x8_t, int16x4_t, '2']
+      - [q_laneq_s16, int16x8_t, int16x8_t, '3']
+      - [_lane_u16, uint16x4_t, uint16x4_t, '2']
+      - [_laneq_u16, uint16x4_t, uint16x8_t, '3']
+      - [q_lane_u16, uint16x8_t, uint16x4_t, '2']
+      - [q_laneq_u16, uint16x8_t, uint16x8_t, '3']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall:
           - "vmla{neon_type[1].no}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+            - FnCall: ['vdup{type[0]}', [c], [LANE]]
 
   - name: "vmla{type[0]}"
     doc: "Vector multiply accumulate with scalar"
@@ -10931,21 +10931,21 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [_lane_s32, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [_laneq_s32, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
-      - [q_lane_s32, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_s32, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_lane_u32, uint32x2_t, uint32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [_laneq_u32, uint32x2_t, uint32x4_t, '2', '[LANE as u32, LANE as u32]']
-      - [q_lane_u32, uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_u32, uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_lane_s32, int32x2_t, int32x2_t, '1']
+      - [_laneq_s32, int32x2_t, int32x4_t, '2']
+      - [q_lane_s32, int32x4_t, int32x2_t, '1']
+      - [q_laneq_s32, int32x4_t, int32x4_t, '2']
+      - [_lane_u32, uint32x2_t, uint32x2_t, '1']
+      - [_laneq_u32, uint32x2_t, uint32x4_t, '2']
+      - [q_lane_u32, uint32x4_t, uint32x2_t, '1']
+      - [q_laneq_u32, uint32x4_t, uint32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall:
           - "vmla{neon_type[1].no}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+            - FnCall: ['vdup{type[0]}', [c], [LANE]]
 
   - name: "vmla{type[0]}"
     doc: "Vector multiply accumulate with scalar"
@@ -10961,17 +10961,17 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [_lane_f32, float32x2_t, float32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [_laneq_f32, float32x2_t, float32x4_t, '2', '[LANE as u32, LANE as u32]']
-      - [q_lane_f32, float32x4_t, float32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_f32, float32x4_t, float32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_lane_f32, float32x2_t, float32x2_t, '1']
+      - [_laneq_f32, float32x2_t, float32x4_t, '2']
+      - [q_lane_f32, float32x4_t, float32x2_t, '1']
+      - [q_laneq_f32, float32x4_t, float32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall:
           - "vmla{neon_type[1].no}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+            - FnCall: ['vdup{type[0]}', [c], [LANE]]
 
   - name: "vmls{neon_type[0].N}"
     doc: "Vector multiply subtract with scalar"
@@ -11050,21 +11050,21 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [_lane_s16, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_laneq_s16, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_lane_s16, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_lane_u16, uint16x4_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_laneq_u16, uint16x4_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_lane_u16, uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_u16, uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_lane_s16, int16x4_t, int16x4_t, '2']
+      - [_laneq_s16, int16x4_t, int16x8_t, '3']
+      - [q_lane_s16, int16x8_t, int16x4_t, '2']
+      - [q_laneq_s16, int16x8_t, int16x8_t, '3']
+      - [_lane_u16, uint16x4_t, uint16x4_t, '2']
+      - [_laneq_u16, uint16x4_t, uint16x8_t, '3']
+      - [q_lane_u16, uint16x8_t, uint16x4_t, '2']
+      - [q_laneq_u16, uint16x8_t, uint16x8_t, '3']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall:
           - "vmls{neon_type[1].no}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+            - FnCall: ['vdup{type[0]}', [c], [LANE]]
 
   - name: "vmls{type[0]}"
     doc: "Vector multiply subtract with scalar"
@@ -11080,21 +11080,21 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [_lane_s32, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [_laneq_s32, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
-      - [q_lane_s32, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_s32, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_lane_u32, uint32x2_t, uint32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [_laneq_u32, uint32x2_t, uint32x4_t, '2', '[LANE as u32, LANE as u32]']
-      - [q_lane_u32, uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_u32, uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_lane_s32, int32x2_t, int32x2_t, '1']
+      - [_laneq_s32, int32x2_t, int32x4_t, '2']
+      - [q_lane_s32, int32x4_t, int32x2_t, '1']
+      - [q_laneq_s32, int32x4_t, int32x4_t, '2']
+      - [_lane_u32, uint32x2_t, uint32x2_t, '1']
+      - [_laneq_u32, uint32x2_t, uint32x4_t, '2']
+      - [q_lane_u32, uint32x4_t, uint32x2_t, '1']
+      - [q_laneq_u32, uint32x4_t, uint32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall:
           - "vmls{neon_type[1].no}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+            - FnCall: ['vdup{type[0]}', [c], [LANE]]
 
   - name: "vmls{type[0]}"
     doc: "Vector multiply subtract with scalar"
@@ -11110,17 +11110,17 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [_lane_f32, float32x2_t, float32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [_laneq_f32, float32x2_t, float32x4_t, '2', '[LANE as u32, LANE as u32]']
-      - [q_lane_f32, float32x4_t, float32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_f32, float32x4_t, float32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_lane_f32, float32x2_t, float32x2_t, '1']
+      - [_laneq_f32, float32x2_t, float32x4_t, '2']
+      - [q_lane_f32, float32x4_t, float32x2_t, '1']
+      - [q_laneq_f32, float32x4_t, float32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall:
           - "vmls{neon_type[1].no}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+            - FnCall: ['vdup{type[0]}', [c], [LANE]]
 
   - name: "vmul{neon_type[0].N}"
     doc: "Vector multiply by scalar"

From ccb273a3c5086e68137f2813925ec72653de3df9 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Sat, 9 May 2026 04:23:22 +0530
Subject: [PATCH 27/78] Change implementation of `vmov{n,l}_high`

---
 .../core_arch/src/aarch64/neon/generated.rs   | 66 +++++--------------
 .../spec/neon/aarch64.spec.yml                | 41 +++++-------
 2 files changed, 34 insertions(+), 73 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 72462dcb604d2..fc6747b24db70 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -12445,10 +12445,8 @@ pub fn vmlsl_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(sxtl2))]
 pub fn vmovl_high_s8(a: int8x16_t) -> int16x8_t {
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        vmovl_s8(a)
-    }
+    let a = vget_high_s8(a);
+    vmovl_s8(a)
 }
 #[doc = "Vector move"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_s16)"]
@@ -12457,10 +12455,8 @@ pub fn vmovl_high_s8(a: int8x16_t) -> int16x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(sxtl2))]
 pub fn vmovl_high_s16(a: int16x8_t) -> int32x4_t {
-    unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        vmovl_s16(a)
-    }
+    let a = vget_high_s16(a);
+    vmovl_s16(a)
 }
 #[doc = "Vector move"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_s32)"]
@@ -12469,10 +12465,8 @@ pub fn vmovl_high_s16(a: int16x8_t) -> int32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(sxtl2))]
 pub fn vmovl_high_s32(a: int32x4_t) -> int64x2_t {
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-        vmovl_s32(a)
-    }
+    let a = vget_high_s32(a);
+    vmovl_s32(a)
 }
 #[doc = "Vector move"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_u8)"]
@@ -12481,10 +12475,8 @@ pub fn vmovl_high_s32(a: int32x4_t) -> int64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(uxtl2))]
 pub fn vmovl_high_u8(a: uint8x16_t) -> uint16x8_t {
-    unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        vmovl_u8(a)
-    }
+    let a = vget_high_u8(a);
+    vmovl_u8(a)
 }
 #[doc = "Vector move"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_u16)"]
@@ -12493,10 +12485,8 @@ pub fn vmovl_high_u8(a: uint8x16_t) -> uint16x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(uxtl2))]
 pub fn vmovl_high_u16(a: uint16x8_t) -> uint32x4_t {
-    unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        vmovl_u16(a)
-    }
+    let a = vget_high_u16(a);
+    vmovl_u16(a)
 }
 #[doc = "Vector move"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_u32)"]
@@ -12505,10 +12495,8 @@ pub fn vmovl_high_u16(a: uint16x8_t) -> uint32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(uxtl2))]
 pub fn vmovl_high_u32(a: uint32x4_t) -> uint64x2_t {
-    unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
-        vmovl_u32(a)
-    }
+    let a = vget_high_u32(a);
+    vmovl_u32(a)
 }
 #[doc = "Extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_s16)"]
@@ -12517,10 +12505,7 @@ pub fn vmovl_high_u32(a: uint32x4_t) -> uint64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(xtn2))]
 pub fn vmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t {
-    unsafe {
-        let c: int8x8_t = simd_cast(b);
-        simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
-    }
+    unsafe { vcombine_s8(a, simd_cast(b)) }
 }
 #[doc = "Extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_s32)"]
@@ -12529,10 +12514,7 @@ pub fn vmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(xtn2))]
 pub fn vmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t {
-    unsafe {
-        let c: int16x4_t = simd_cast(b);
-        simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7])
-    }
+    unsafe { vcombine_s16(a, simd_cast(b)) }
 }
 #[doc = "Extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_s64)"]
@@ -12541,10 +12523,7 @@ pub fn vmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(xtn2))]
 pub fn vmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t {
-    unsafe {
-        let c: int32x2_t = simd_cast(b);
-        simd_shuffle!(a, c, [0, 1, 2, 3])
-    }
+    unsafe { vcombine_s32(a, simd_cast(b)) }
 }
 #[doc = "Extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_u16)"]
@@ -12553,10 +12532,7 @@ pub fn vmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(xtn2))]
 pub fn vmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
-    unsafe {
-        let c: uint8x8_t = simd_cast(b);
-        simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
-    }
+    unsafe { vcombine_u8(a, simd_cast(b)) }
 }
 #[doc = "Extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_u32)"]
@@ -12565,10 +12541,7 @@ pub fn vmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(xtn2))]
 pub fn vmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
-    unsafe {
-        let c: uint16x4_t = simd_cast(b);
-        simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7])
-    }
+    unsafe { vcombine_u16(a, simd_cast(b)) }
 }
 #[doc = "Extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_u64)"]
@@ -12577,10 +12550,7 @@ pub fn vmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(xtn2))]
 pub fn vmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
-    unsafe {
-        let c: uint32x2_t = simd_cast(b);
-        simd_shuffle!(a, c, [0, 1, 2, 3])
-    }
+    unsafe { vcombine_u32(a, simd_cast(b)) }
 }
 #[doc = "Multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f64)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 37f28ca129dbe..074e2af49220c 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -2668,24 +2668,17 @@ intrinsics:
       - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [xtn2]]}]]
     safety: safe
     types:
-      - [int8x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int16x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [int32x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]']
-      - [uint8x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [uint32x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]']
+      - [int8x8_t, int16x8_t, int8x16_t]
+      - [int16x4_t, int32x4_t, int16x8_t]
+      - [int32x2_t, int64x2_t, int32x4_t]
+      - [uint8x8_t, uint16x8_t, uint8x16_t]
+      - [uint16x4_t, uint32x4_t, uint16x8_t]
+      - [uint32x2_t, uint64x2_t, uint32x4_t]
     compose:
-      - Let:
-          - c
-          - "{neon_type[0]}"
           - FnCall:
-              - simd_cast
-              - - b
-      - FnCall:
-          - simd_shuffle!
+          - 'vcombine_{neon_type[0]}'
           - - a
-            - c
-            - "{type[3]}"
+            - FnCall: ['simd_cast', [b]]
 
   - name: "vneg{neon_type.no}"
     doc: Negate
@@ -6788,14 +6781,13 @@ intrinsics:
       - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sxtl2]]}]]
     safety: safe
     types:
-      - [int8x16_t, int16x8_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int16x8_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]']
-      - [int32x4_t, int64x2_t, int32x2_t, '[2, 3]']
+      - [int8x16_t, int16x8_t]
+      - [int16x8_t, int32x4_t]
+      - [int32x4_t, int64x2_t]
     compose:
       - Let:
           - a
-          - "{neon_type[2]}"
-          - FnCall: [simd_shuffle!, [a, a, "{type[3]}"]]
+          - FnCall: ['vget_high_{neon_type[0]}', [a]]
       - FnCall: ["vmovl{neon_type[0].noq}", [a]]
 
   - name: "vmovl_high{neon_type[0].noq}"
@@ -6807,14 +6799,13 @@ intrinsics:
       - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uxtl2]]}]]
     safety: safe
     types:
-      - [uint8x16_t, uint16x8_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x8_t, uint32x4_t, uint16x4_t, '[4, 5, 6, 7]']
-      - [uint32x4_t, uint64x2_t, uint32x2_t, '[2, 3]']
+      - [uint8x16_t, uint16x8_t]
+      - [uint16x8_t, uint32x4_t]
+      - [uint32x4_t, uint64x2_t]
     compose:
       - Let:
           - a
-          - "{neon_type[2]}"
-          - FnCall: [simd_shuffle!, [a, a, "{type[3]}"]]
+          - FnCall: ['vget_high_{neon_type[0]}', [a]]
       - FnCall: ["vmovl{neon_type[0].noq}", [a]]
 
   - name: "vpadd{neon_type[0].no}"

From c02a41ccaacda0ee5243a576f05d6a32e3a106d8 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Sat, 9 May 2026 04:23:51 +0530
Subject: [PATCH 28/78] Change implementation of `vmull_{high,lane}`

---
 .../core_arch/src/aarch64/neon/generated.rs   | 56 +++++++------------
 .../src/arm_shared/neon/generated.rs          | 36 +++---------
 .../spec/neon/aarch64.spec.yml                | 52 ++++++-----------
 .../spec/neon/arm_shared.spec.yml             | 20 +++----
 4 files changed, 56 insertions(+), 108 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index fc6747b24db70..7f53a0c628ca9 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -12822,11 +12822,9 @@ pub fn vmull_high_p64(a: poly64x2_t, b: poly64x2_t) -> p128 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(pmull2))]
 pub fn vmull_high_p8(a: poly8x16_t, b: poly8x16_t) -> poly16x8_t {
-    unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let b: poly8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        vmull_p8(a, b)
-    }
+    let a = vget_high_p8(a);
+    let b = vget_high_p8(b);
+    vmull_p8(a, b)
 }
 #[doc = "Signed multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_s8)"]
@@ -12835,11 +12833,9 @@ pub fn vmull_high_p8(a: poly8x16_t, b: poly8x16_t) -> poly16x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2))]
 pub fn vmull_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        vmull_s8(a, b)
-    }
+    let a = vget_high_s8(a);
+    let b = vget_high_s8(b);
+    vmull_s8(a, b)
 }
 #[doc = "Signed multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_s16)"]
@@ -12848,11 +12844,9 @@ pub fn vmull_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2))]
 pub fn vmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
-    unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        vmull_s16(a, b)
-    }
+    let a = vget_high_s16(a);
+    let b = vget_high_s16(b);
+    vmull_s16(a, b)
 }
 #[doc = "Signed multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_s32)"]
@@ -12861,11 +12855,9 @@ pub fn vmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2))]
 pub fn vmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-        let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-        vmull_s32(a, b)
-    }
+    let a = vget_high_s32(a);
+    let b = vget_high_s32(b);
+    vmull_s32(a, b)
 }
 #[doc = "Unsigned multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_u8)"]
@@ -12874,11 +12866,9 @@ pub fn vmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2))]
 pub fn vmull_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
-    unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        vmull_u8(a, b)
-    }
+    let a = vget_high_u8(a);
+    let b = vget_high_u8(b);
+    vmull_u8(a, b)
 }
 #[doc = "Unsigned multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_u16)"]
@@ -12887,11 +12877,9 @@ pub fn vmull_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2))]
 pub fn vmull_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
-    unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        vmull_u16(a, b)
-    }
+    let a = vget_high_u16(a);
+    let b = vget_high_u16(b);
+    vmull_u16(a, b)
 }
 #[doc = "Unsigned multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_u32)"]
@@ -12900,11 +12888,9 @@ pub fn vmull_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2))]
 pub fn vmull_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
-    unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
-        let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
-        vmull_u32(a, b)
-    }
+    let a = vget_high_u32(a);
+    let b = vget_high_u32(b);
+    vmull_u32(a, b)
 }
 #[doc = "Polynomial multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_p64)"]
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index 134a549daf77c..111397752b498 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -30965,12 +30965,7 @@ pub fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 )]
 pub fn vmull_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmull_s16(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmull_s16(a, vdup_lane_s16::<LANE>(b))
 }
 #[doc = "Vector long multiply by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s16)"]
@@ -30993,12 +30988,7 @@ pub fn vmull_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t
 )]
 pub fn vmull_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmull_s16(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmull_s16(a, vdup_laneq_s16::<LANE>(b))
 }
 #[doc = "Vector long multiply by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s32)"]
@@ -31021,7 +31011,7 @@ pub fn vmull_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t
 )]
 pub fn vmull_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmull_s32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+    vmull_s32(a, vdup_lane_s32::<LANE>(b))
 }
 #[doc = "Vector long multiply by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s32)"]
@@ -31044,7 +31034,7 @@ pub fn vmull_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t
 )]
 pub fn vmull_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmull_s32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+    vmull_s32(a, vdup_laneq_s32::<LANE>(b))
 }
 #[doc = "Vector long multiply by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u16)"]
@@ -31067,12 +31057,7 @@ pub fn vmull_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t
 )]
 pub fn vmull_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmull_u16(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmull_u16(a, vdup_lane_u16::<LANE>(b))
 }
 #[doc = "Vector long multiply by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u16)"]
@@ -31095,12 +31080,7 @@ pub fn vmull_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint32x4
 )]
 pub fn vmull_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmull_u16(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmull_u16(a, vdup_laneq_u16::<LANE>(b))
 }
 #[doc = "Vector long multiply by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u32)"]
@@ -31123,7 +31103,7 @@ pub fn vmull_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint32x
 )]
 pub fn vmull_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmull_u32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+    vmull_u32(a, vdup_lane_u32::<LANE>(b))
 }
 #[doc = "Vector long multiply by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u32)"]
@@ -31146,7 +31126,7 @@ pub fn vmull_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint64x2
 )]
 pub fn vmull_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmull_u32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+    vmull_u32(a, vdup_laneq_u32::<LANE>(b))
 }
 #[doc = "Vector long multiply with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s16)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 074e2af49220c..35116dd3a0d01 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -2675,7 +2675,7 @@ intrinsics:
       - [uint16x4_t, uint32x4_t, uint16x8_t]
       - [uint32x2_t, uint64x2_t, uint32x4_t]
     compose:
-          - FnCall:
+      - FnCall:
           - 'vcombine_{neon_type[0]}'
           - - a
             - FnCall: ['simd_cast', [b]]
@@ -5035,47 +5035,35 @@ intrinsics:
   - name: "vmull_high{neon_type[0].noq}"
     doc: Signed multiply long
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[3]}"
+    return_type: "{neon_type[1]}"
     attr:
       - *neon-stable
       - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smull2]]}]]
     safety: safe
     types:
-      - [int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int16x8_t]
-      - [int16x8_t, int16x4_t, '[4, 5, 6, 7]', int32x4_t]
-      - [int32x4_t, int32x2_t, '[2, 3]', int64x2_t]
+      - [int8x16_t, int16x8_t]
+      - [int16x8_t, int32x4_t]
+      - [int32x4_t, int64x2_t]
     compose:
-      - Let:
-          - a
-          - "{neon_type[1]}"
-          - FnCall: [simd_shuffle!, [a, a, "{type[2]}"]]
-      - Let:
-          - b
-          - "{neon_type[1]}"
-          - FnCall: [simd_shuffle!, [b, b, "{type[2]}"]]
+      - Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
+      - Let: [b, {FnCall: ['vget_high_{neon_type[0]}', [b]]}]
       - FnCall: ["vmull_{neon_type[0]}", [a, b]]
 
   - name: "vmull_high{neon_type[0].noq}"
     doc: "Unsigned multiply long"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[3]}"
+    return_type: "{neon_type[1]}"
     attr:
       - *neon-stable
       - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umull2]]}]]
     safety: safe
     types:
-      - [uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', uint16x8_t]
-      - [uint16x8_t, uint16x4_t, '[4, 5, 6, 7]', uint32x4_t]
-      - [uint32x4_t, uint32x2_t, '[2, 3]', uint64x2_t]
+      - [uint8x16_t, uint16x8_t]
+      - [uint16x8_t, uint32x4_t]
+      - [uint32x4_t, uint64x2_t]
     compose:
-      - Let:
-          - a
-          - "{neon_type[1]}"
-          - FnCall: [simd_shuffle!, [a, a, "{type[2]}"]]
-      - Let:
-          - b
-          - "{neon_type[1]}"
-          - FnCall: [simd_shuffle!, [b, b, "{type[2]}"]]
+      - Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
+      - Let: [b, {FnCall: ['vget_high_{neon_type[0]}', [b]]}]
       - FnCall: ["vmull_{neon_type[0]}", [a, b]]
 
   - name: "vmull_p64"
@@ -5101,22 +5089,16 @@ intrinsics:
   - name: "vmull_high{neon_type[0].noq}"
     doc: "Polynomial multiply long"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[3]}"
+    return_type: "{neon_type[1]}"
     attr:
       - *neon-stable
       - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [pmull2]]}]]
     safety: safe
     types:
-      - [poly8x16_t, poly8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', poly16x8_t]
+      - [poly8x16_t, poly16x8_t]
     compose:
-      - Let:
-          - a
-          - "{neon_type[1]}"
-          - FnCall: [simd_shuffle!, [a, a, "{type[2]}"]]
-      - Let:
-          - b
-          - "{neon_type[1]}"
-          - FnCall: [simd_shuffle!, [b, b, "{type[2]}"]]
+      - Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
+      - Let: [b, {FnCall: ['vget_high_{neon_type[0]}', [b]]}]
       - FnCall: ["vmull_{neon_type[0]}", [a, b]]
 
   - name: "vmull_high{neon_type[0].noq}"
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
index a56c6079dc9ab..47ec2d0956939 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@@ -11357,16 +11357,16 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [int16x4_t, int16x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int16x4_t, int16x8_t, int32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int32x2_t, int32x2_t, int64x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [int32x2_t, int32x4_t, int64x2_t, '2', '[LANE as u32, LANE as u32]']
+      - [int16x4_t, int16x4_t, int32x4_t, '2']
+      - [int16x4_t, int16x8_t, int32x4_t, '3']
+      - [int32x2_t, int32x2_t, int64x2_t, '1']
+      - [int32x2_t, int32x4_t, int64x2_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
       - FnCall:
           - "vmull_{neon_type[0]}"
           - - a
-            - FnCall: [simd_shuffle!, [b, b, "{type[4]}"]]
+            - FnCall: ['vdup_lane{neon_type[1].nox}', [b], [LANE]]
 
   - name: "vmull_lane{neon_type[1].no}"
     doc: "Vector long multiply by scalar"
@@ -11382,16 +11382,16 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [uint16x4_t, uint16x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint16x4_t, uint16x8_t, uint32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint32x2_t, uint32x2_t, uint64x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [uint32x2_t, uint32x4_t, uint64x2_t, '2', '[LANE as u32, LANE as u32]']
+      - [uint16x4_t, uint16x4_t, uint32x4_t, '2']
+      - [uint16x4_t, uint16x8_t, uint32x4_t, '3']
+      - [uint32x2_t, uint32x2_t, uint64x2_t, '1']
+      - [uint32x2_t, uint32x4_t, uint64x2_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
       - FnCall:
           - "vmull_{neon_type[0]}"
           - - a
-            - FnCall: [simd_shuffle!, [b, b, "{type[4]}"]]
+            - FnCall: ['vdup_lane{neon_type[1].nox}', [b], [LANE]]
 
   - name: "vfms{neon_type[0].N}"
     doc: "Floating-point fused Multiply-subtract to accumulator(vector)"

From 401582dd27e90a0cbe7fc92fb0d10162877951af Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Sat, 9 May 2026 04:31:21 +0530
Subject: [PATCH 29/78] Change implementation of `vmulx_lane`

---
 .../core_arch/src/aarch64/neon/generated.rs   | 20 +++++------
 .../spec/neon/aarch64.spec.yml                | 36 +++++++------------
 2 files changed, 22 insertions(+), 34 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 7f53a0c628ca9..b7db2f0c9a3fb 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -13074,7 +13074,7 @@ pub fn vmulxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulx_lane_f16<const LANE: i32>(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmulx_f16(a, simd_shuffle!(b, b, [LANE as u32; 4])) }
+    vmulx_f16(a, vdup_lane_f16::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_laneq_f16)"]
@@ -13086,7 +13086,7 @@ pub fn vmulx_lane_f16<const LANE: i32>(a: float16x4_t, b: float16x4_t) -> float1
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulx_laneq_f16<const LANE: i32>(a: float16x4_t, b: float16x8_t) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vmulx_f16(a, simd_shuffle!(b, b, [LANE as u32; 4])) }
+    vmulx_f16(a, vdup_laneq_f16::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_lane_f16)"]
@@ -13098,7 +13098,7 @@ pub fn vmulx_laneq_f16<const LANE: i32>(a: float16x4_t, b: float16x8_t) -> float
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulxq_lane_f16<const LANE: i32>(a: float16x8_t, b: float16x4_t) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmulxq_f16(a, simd_shuffle!(b, b, [LANE as u32; 8])) }
+    vmulxq_f16(a, vdupq_lane_f16::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_laneq_f16)"]
@@ -13110,7 +13110,7 @@ pub fn vmulxq_lane_f16<const LANE: i32>(a: float16x8_t, b: float16x4_t) -> float
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulxq_laneq_f16<const LANE: i32>(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vmulxq_f16(a, simd_shuffle!(b, b, [LANE as u32; 8])) }
+    vmulxq_f16(a, vdupq_laneq_f16::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_lane_f32)"]
@@ -13121,7 +13121,7 @@ pub fn vmulxq_laneq_f16<const LANE: i32>(a: float16x8_t, b: float16x8_t) -> floa
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulx_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmulx_f32(a, simd_shuffle!(b, b, [LANE as u32; 2])) }
+    vmulx_f32(a, vdup_lane_f32::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_laneq_f32)"]
@@ -13132,7 +13132,7 @@ pub fn vmulx_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float3
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulx_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmulx_f32(a, simd_shuffle!(b, b, [LANE as u32; 2])) }
+    vmulx_f32(a, vdup_laneq_f32::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_lane_f32)"]
@@ -13143,7 +13143,7 @@ pub fn vmulx_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulxq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmulxq_f32(a, simd_shuffle!(b, b, [LANE as u32; 4])) }
+    vmulxq_f32(a, vdupq_lane_f32::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_laneq_f32)"]
@@ -13154,7 +13154,7 @@ pub fn vmulxq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulxq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmulxq_f32(a, simd_shuffle!(b, b, [LANE as u32; 4])) }
+    vmulxq_f32(a, vdupq_laneq_f32::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_laneq_f64)"]
@@ -13165,7 +13165,7 @@ pub fn vmulxq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> floa
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulxq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmulxq_f64(a, simd_shuffle!(b, b, [LANE as u32; 2])) }
+    vmulxq_f64(a, vdupq_laneq_f64::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_lane_f64)"]
@@ -13335,7 +13335,7 @@ pub fn vmulxh_laneq_f16<const LANE: i32>(a: f16, b: float16x8_t) -> f16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulxq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
     static_assert!(LANE == 0);
-    unsafe { vmulxq_f64(a, simd_shuffle!(b, b, [LANE as u32; 2])) }
+    vmulxq_f64(a, vdupq_lane_f64::<LANE>(b))
 }
 #[doc = "Negate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f64)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 35116dd3a0d01..ba248f4f0715b 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -5235,11 +5235,7 @@ intrinsics:
       - FnCall:
           - "vmulx{type[3]}"
           - - a
-            - FnCall:
-                - "simd_shuffle!"
-                - - b
-                  - b
-                  - "{type[4]}"
+            - FnCall: ['vdup{type[0]}', [b], [LANE]]
 
   - name: "vmulx{type[0]}"
     doc: Floating-point multiply extended
@@ -5292,21 +5288,17 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - ['_lane_f32', float32x2_t, float32x2_t, '1', '_f32', '[LANE as u32; 2]']
-      - ['_laneq_f32', float32x2_t, float32x4_t, '2', '_f32', '[LANE as u32; 2]']
-      - ['q_lane_f32', float32x4_t, float32x2_t, '1', 'q_f32', '[LANE as u32; 4]']
-      - ['q_laneq_f32', float32x4_t, float32x4_t, '2', 'q_f32', '[LANE as u32; 4]']
-      - ['q_laneq_f64', float64x2_t, float64x2_t, '1', 'q_f64', '[LANE as u32; 2]']
+      - ['_lane_f32', float32x2_t, float32x2_t, '1', '_f32']
+      - ['_laneq_f32', float32x2_t, float32x4_t, '2', '_f32']
+      - ['q_lane_f32', float32x4_t, float32x2_t, '1', 'q_f32']
+      - ['q_laneq_f32', float32x4_t, float32x4_t, '2', 'q_f32']
+      - ['q_laneq_f64', float64x2_t, float64x2_t, '1', 'q_f64']
     compose:
       - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]]
       - FnCall:
           - "vmulx{type[4]}"
           - - a
-            - FnCall:
-                - "simd_shuffle!"
-                - - b
-                  - b
-                  - "{type[5]}"
+            - FnCall: ['vdup{type[0]}', [b], [LANE]]
 
 
   - name: "vmulx{type[0]}"
@@ -5322,20 +5314,16 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - ['_lane_f16', float16x4_t, float16x4_t, '2', '_f16', '[LANE as u32; 4]']
-      - ['_laneq_f16', float16x4_t, float16x8_t, '3', '_f16', '[LANE as u32; 4]']
-      - ['q_lane_f16', float16x8_t, float16x4_t, '2', 'q_f16', '[LANE as u32; 8]']
-      - ['q_laneq_f16', float16x8_t, float16x8_t, '3', 'q_f16', '[LANE as u32; 8]']
+      - ['_lane_f16', float16x4_t, float16x4_t, '2', '_f16']
+      - ['_laneq_f16', float16x4_t, float16x8_t, '3', '_f16']
+      - ['q_lane_f16', float16x8_t, float16x4_t, '2', 'q_f16']
+      - ['q_laneq_f16', float16x8_t, float16x8_t, '3', 'q_f16']
     compose:
       - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]]
       - FnCall:
           - "vmulx{type[4]}"
           - - a
-            - FnCall:
-                - "simd_shuffle!"
-                - - b
-                  - b
-                  - "{type[5]}"
+            - FnCall: ['vdup{type[0]}', [b], [LANE]]
 
 
   - name: "vmulx{type[0]}"

From 63d8cef02fd65eb54b6e82d0a38f49462f4e6a4f Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Sat, 9 May 2026 04:39:21 +0530
Subject: [PATCH 30/78] Change implementation of `v{add,sub}{l,w}_high`

---
 .../core_arch/src/aarch64/neon/generated.rs   | 84 +++++++------------
 .../src/arm_shared/neon/generated.rs          | 36 ++++----
 .../spec/neon/aarch64.spec.yml                | 62 +++++---------
 .../spec/neon/arm_shared.spec.yml             | 31 ++++---
 4 files changed, 85 insertions(+), 128 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index b7db2f0c9a3fb..eb590b3af3e23 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -23299,11 +23299,9 @@ pub fn vsubh_f16(a: f16, b: f16) -> f16 {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubl2))]
 pub fn vsubl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
     unsafe {
-        let c: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let d: int16x8_t = simd_cast(c);
-        let e: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let f: int16x8_t = simd_cast(e);
-        simd_sub(d, f)
+        let c: int16x8_t = simd_cast(vget_high_s8(a));
+        let d: int16x8_t = simd_cast(vget_high_s8(b));
+        simd_sub(c, d)
     }
 }
 #[doc = "Signed Subtract Long"]
@@ -23314,11 +23312,9 @@ pub fn vsubl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubl2))]
 pub fn vsubl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
     unsafe {
-        let c: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        let d: int32x4_t = simd_cast(c);
-        let e: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        let f: int32x4_t = simd_cast(e);
-        simd_sub(d, f)
+        let c: int32x4_t = simd_cast(vget_high_s16(a));
+        let d: int32x4_t = simd_cast(vget_high_s16(b));
+        simd_sub(c, d)
     }
 }
 #[doc = "Signed Subtract Long"]
@@ -23329,11 +23325,9 @@ pub fn vsubl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubl2))]
 pub fn vsubl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
     unsafe {
-        let c: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-        let d: int64x2_t = simd_cast(c);
-        let e: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-        let f: int64x2_t = simd_cast(e);
-        simd_sub(d, f)
+        let c: int64x2_t = simd_cast(vget_high_s32(a));
+        let d: int64x2_t = simd_cast(vget_high_s32(b));
+        simd_sub(c, d)
     }
 }
 #[doc = "Unsigned Subtract Long"]
@@ -23344,11 +23338,9 @@ pub fn vsubl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(usubl2))]
 pub fn vsubl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
     unsafe {
-        let c: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let d: uint16x8_t = simd_cast(c);
-        let e: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let f: uint16x8_t = simd_cast(e);
-        simd_sub(d, f)
+        let c: uint16x8_t = simd_cast(vget_high_u8(a));
+        let d: uint16x8_t = simd_cast(vget_high_u8(b));
+        simd_sub(c, d)
     }
 }
 #[doc = "Unsigned Subtract Long"]
@@ -23359,11 +23351,9 @@ pub fn vsubl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(usubl2))]
 pub fn vsubl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
     unsafe {
-        let c: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        let d: uint32x4_t = simd_cast(c);
-        let e: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        let f: uint32x4_t = simd_cast(e);
-        simd_sub(d, f)
+        let c: uint32x4_t = simd_cast(vget_high_u16(a));
+        let d: uint32x4_t = simd_cast(vget_high_u16(b));
+        simd_sub(c, d)
     }
 }
 #[doc = "Unsigned Subtract Long"]
@@ -23374,11 +23364,9 @@ pub fn vsubl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(usubl2))]
 pub fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
     unsafe {
-        let c: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
-        let d: uint64x2_t = simd_cast(c);
-        let e: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
-        let f: uint64x2_t = simd_cast(e);
-        simd_sub(d, f)
+        let c: uint64x2_t = simd_cast(vget_high_u32(a));
+        let d: uint64x2_t = simd_cast(vget_high_u32(b));
+        simd_sub(c, d)
     }
 }
 #[doc = "Signed Subtract Wide"]
@@ -23388,10 +23376,8 @@ pub fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubw2))]
 pub fn vsubw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
-    unsafe {
-        let c: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        simd_sub(a, simd_cast(c))
-    }
+    let c = vget_high_s8(b);
+    unsafe { simd_sub(a, simd_cast(c)) }
 }
 #[doc = "Signed Subtract Wide"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s16)"]
@@ -23400,10 +23386,8 @@ pub fn vsubw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubw2))]
 pub fn vsubw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
-    unsafe {
-        let c: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        simd_sub(a, simd_cast(c))
-    }
+    let c = vget_high_s16(b);
+    unsafe { simd_sub(a, simd_cast(c)) }
 }
 #[doc = "Signed Subtract Wide"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s32)"]
@@ -23412,10 +23396,8 @@ pub fn vsubw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubw2))]
 pub fn vsubw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
-    unsafe {
-        let c: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-        simd_sub(a, simd_cast(c))
-    }
+    let c = vget_high_s32(b);
+    unsafe { simd_sub(a, simd_cast(c)) }
 }
 #[doc = "Unsigned Subtract Wide"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u8)"]
@@ -23424,10 +23406,8 @@ pub fn vsubw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(usubw2))]
 pub fn vsubw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
-    unsafe {
-        let c: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        simd_sub(a, simd_cast(c))
-    }
+    let c = vget_high_u8(b);
+    unsafe { simd_sub(a, simd_cast(c)) }
 }
 #[doc = "Unsigned Subtract Wide"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u16)"]
@@ -23436,10 +23416,8 @@ pub fn vsubw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(usubw2))]
 pub fn vsubw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
-    unsafe {
-        let c: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        simd_sub(a, simd_cast(c))
-    }
+    let c = vget_high_u16(b);
+    unsafe { simd_sub(a, simd_cast(c)) }
 }
 #[doc = "Unsigned Subtract Wide"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u32)"]
@@ -23448,10 +23426,8 @@ pub fn vsubw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(usubw2))]
 pub fn vsubw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
-    unsafe {
-        let c: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
-        simd_sub(a, simd_cast(c))
-    }
+    let c = vget_high_u32(b);
+    unsafe { simd_sub(a, simd_cast(c)) }
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_s8)"]
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index 111397752b498..6d54e84f25089 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -2499,9 +2499,9 @@ pub fn vaddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vaddl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+    let a: int16x4_t = vget_high_s16(a);
+    let b: int16x4_t = vget_high_s16(b);
     unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
         let a: int32x4_t = simd_cast(a);
         let b: int32x4_t = simd_cast(b);
         simd_add(a, b)
@@ -2530,9 +2530,9 @@ pub fn vaddl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vaddl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+    let a: int32x2_t = vget_high_s32(a);
+    let b: int32x2_t = vget_high_s32(b);
     unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-        let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
         let a: int64x2_t = simd_cast(a);
         let b: int64x2_t = simd_cast(b);
         simd_add(a, b)
@@ -2561,9 +2561,9 @@ pub fn vaddl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vaddl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
+    let a: int8x8_t = vget_high_s8(a);
+    let b: int8x8_t = vget_high_s8(b);
     unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
         let a: int16x8_t = simd_cast(a);
         let b: int16x8_t = simd_cast(b);
         simd_add(a, b)
@@ -2592,9 +2592,9 @@ pub fn vaddl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vaddl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
+    let a: uint16x4_t = vget_high_u16(a);
+    let b: uint16x4_t = vget_high_u16(b);
     unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
         let a: uint32x4_t = simd_cast(a);
         let b: uint32x4_t = simd_cast(b);
         simd_add(a, b)
@@ -2623,9 +2623,9 @@ pub fn vaddl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vaddl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
+    let a: uint32x2_t = vget_high_u32(a);
+    let b: uint32x2_t = vget_high_u32(b);
     unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
-        let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
         let a: uint64x2_t = simd_cast(a);
         let b: uint64x2_t = simd_cast(b);
         simd_add(a, b)
@@ -2654,9 +2654,9 @@ pub fn vaddl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vaddl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
+    let a: uint8x8_t = vget_high_u8(a);
+    let b: uint8x8_t = vget_high_u8(b);
     unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
         let a: uint16x8_t = simd_cast(a);
         let b: uint16x8_t = simd_cast(b);
         simd_add(a, b)
@@ -2856,8 +2856,8 @@ pub fn vaddq_p128(a: p128, b: p128) -> p128 {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vaddw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
+    let b = vget_high_s16(b);
     unsafe {
-        let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
         let b: int32x4_t = simd_cast(b);
         simd_add(a, b)
     }
@@ -2885,8 +2885,8 @@ pub fn vaddw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vaddw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
+    let b = vget_high_s32(b);
     unsafe {
-        let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
         let b: int64x2_t = simd_cast(b);
         simd_add(a, b)
     }
@@ -2914,8 +2914,8 @@ pub fn vaddw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vaddw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
+    let b = vget_high_s8(b);
     unsafe {
-        let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
         let b: int16x8_t = simd_cast(b);
         simd_add(a, b)
     }
@@ -2943,8 +2943,8 @@ pub fn vaddw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vaddw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
+    let b = vget_high_u16(b);
     unsafe {
-        let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
         let b: uint32x4_t = simd_cast(b);
         simd_add(a, b)
     }
@@ -2972,8 +2972,8 @@ pub fn vaddw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vaddw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
+    let b = vget_high_u32(b);
     unsafe {
-        let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
         let b: uint64x2_t = simd_cast(b);
         simd_add(a, b)
     }
@@ -3001,8 +3001,8 @@ pub fn vaddw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vaddw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
+    let b = vget_high_u8(b);
     unsafe {
-        let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
         let b: uint16x8_t = simd_cast(b);
         simd_add(a, b)
     }
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index ba248f4f0715b..a6208fac7c350 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -5667,14 +5667,13 @@ intrinsics:
       - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [ssubw2]]}]]
     safety: safe
     types:
-      - [int16x8_t, int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int32x4_t, int16x8_t, int16x4_t, '[4, 5, 6, 7]']
-      - [int64x2_t, int32x4_t, int32x2_t, '[2, 3]']
+      - [int16x8_t, int8x16_t]
+      - [int32x4_t, int16x8_t]
+      - [int64x2_t, int32x4_t]
     compose:
       - Let:
           - c
-          - "{neon_type[2]}"
-          - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]
+          - FnCall: ['vget_high_{neon_type[1]}', [b]]
       - FnCall:
           - simd_sub
           - - a
@@ -5689,14 +5688,13 @@ intrinsics:
       - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [usubw2]]}]]
     safety: safe
     types:
-      - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint32x4_t, uint16x8_t, uint16x4_t, '[4, 5, 6, 7]']
-      - [uint64x2_t, uint32x4_t, uint32x2_t, '[2, 3]']
+      - [uint16x8_t, uint8x16_t]
+      - [uint32x4_t, uint16x8_t]
+      - [uint64x2_t, uint32x4_t]
     compose:
       - Let:
           - c
-          - "{neon_type[2]}"
-          - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]
+          - FnCall: ['vget_high_{neon_type[1]}', [b]]
       - FnCall:
           - simd_sub
           - - a
@@ -5711,27 +5709,19 @@ intrinsics:
       - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [ssubl2]]}]]
     safety: safe
     types:
-      - [int8x16_t, int16x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int8x8_t]
-      - [int16x8_t, int32x4_t, '[4, 5, 6, 7]', int16x4_t]
-      - [int32x4_t, int64x2_t, '[2, 3]', int32x2_t]
+      - [int8x16_t, int16x8_t]
+      - [int32x4_t, int64x2_t]
+      - [int16x8_t, int32x4_t]
     compose:
       - Let:
           - c
-          - "{neon_type[3]}"
-          - FnCall: [simd_shuffle!, [a, a, "{type[2]}"]]
-      - Let:
-          - d
           - "{neon_type[1]}"
-          - FnCall: [simd_cast, [c]]
-      - Let:
-          - e
-          - "{neon_type[3]}"
-          - FnCall: [simd_shuffle!, [b, b, "{type[2]}"]]
+          - FnCall: [simd_cast, [{FnCall: ['vget_high_{neon_type[0]}', [a]]}]]
       - Let:
-          - f
+          - d
           - "{neon_type[1]}"
-          - FnCall: [simd_cast, [e]]
-      - FnCall: [simd_sub, [d, f]]
+          - FnCall: [simd_cast, [{FnCall: ['vget_high_{neon_type[0]}', [b]]}]]
+      - FnCall: [simd_sub, [c, d]]
 
   - name: "vsubl_high{neon_type[0].noq}"
     doc: "Unsigned Subtract Long"
@@ -5742,27 +5732,19 @@ intrinsics:
       - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [usubl2]]}]]
     safety: safe
     types:
-      - [uint8x16_t, uint16x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', uint8x8_t]
-      - [uint16x8_t, uint32x4_t, '[4, 5, 6, 7]', uint16x4_t]
-      - [uint32x4_t, uint64x2_t, '[2, 3]', uint32x2_t]
+      - [uint8x16_t, uint16x8_t]
+      - [uint16x8_t, uint32x4_t]
+      - [uint32x4_t, uint64x2_t]
     compose:
       - Let:
           - c
-          - "{neon_type[3]}"
-          - FnCall: [simd_shuffle!, [a, a, "{type[2]}"]]
-      - Let:
-          - d
           - "{neon_type[1]}"
-          - FnCall: [simd_cast, [c]]
-      - Let:
-          - e
-          - "{neon_type[3]}"
-          - FnCall: [simd_shuffle!, [b, b, "{type[2]}"]]
+          - FnCall: [simd_cast, [{FnCall: ['vget_high_{neon_type[0]}', [a]]}]]
       - Let:
-          - f
+          - d
           - "{neon_type[1]}"
-          - FnCall: [simd_cast, [e]]
-      - FnCall: [simd_sub, [d, f]]
+          - FnCall: [simd_cast, [{FnCall: ['vget_high_{neon_type[0]}', [b]]}]]
+      - FnCall: [simd_sub, [c, d]]
 
   - name: "vbcax{neon_type.no}"
     doc: Bit clear and exclusive OR
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
index 47ec2d0956939..4024e624ca9ba 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@@ -14478,21 +14478,21 @@ intrinsics:
       - *neon-cfg-arm-unstable
     safety: safe
     types:
-      - ['vaddl_high_s8', 'int8x16_t', 'int16x8_t', 'vaddl', 'saddl2',  'int8x8_t', '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - ['vaddl_high_s16', 'int16x8_t', 'int32x4_t', 'vaddl', 'saddl2',  'int16x4_t', '[4, 5, 6, 7]']
-      - ['vaddl_high_s32', 'int32x4_t', 'int64x2_t', 'vaddl', 'saddl2',  'int32x2_t', '[2, 3]']
-      - ['vaddl_high_u8', 'uint8x16_t', 'uint16x8_t', 'vaddl', 'uaddl2',  'uint8x8_t', '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - ['vaddl_high_u16', 'uint16x8_t', 'uint32x4_t', 'vaddl', 'uaddl2',  'uint16x4_t', '[4, 5, 6, 7]']
-      - ['vaddl_high_u32', 'uint32x4_t', 'uint64x2_t', 'vaddl', 'uaddl2',  'uint32x2_t', '[2, 3]']
+      - ['vaddl_high_s8', 'int8x16_t', 'int16x8_t', 'vaddl', 'saddl2',  'int8x8_t']
+      - ['vaddl_high_s16', 'int16x8_t', 'int32x4_t', 'vaddl', 'saddl2',  'int16x4_t']
+      - ['vaddl_high_s32', 'int32x4_t', 'int64x2_t', 'vaddl', 'saddl2',  'int32x2_t']
+      - ['vaddl_high_u8', 'uint8x16_t', 'uint16x8_t', 'vaddl', 'uaddl2',  'uint8x8_t']
+      - ['vaddl_high_u16', 'uint16x8_t', 'uint32x4_t', 'vaddl', 'uaddl2',  'uint16x4_t']
+      - ['vaddl_high_u32', 'uint32x4_t', 'uint64x2_t', 'vaddl', 'uaddl2',  'uint32x2_t']
     compose:
       - Let:
           - a
           - '{neon_type[5]}'
-          - FnCall: ['simd_shuffle!', [a, a, '{type[6]}']]
+          - FnCall: ['vget_high_{neon_type[1]}', [a]]
       - Let:
           - b
           - '{neon_type[5]}'
-          - FnCall: ['simd_shuffle!', [b, b, '{type[6]}']]
+          - FnCall: ['vget_high_{neon_type[1]}', [b]]
       - Let: [a, '{neon_type[2]}', {FnCall: [simd_cast, [a]]}]
       - Let: [b, '{neon_type[2]}', {FnCall: [simd_cast, [b]]}]
       - FnCall: [simd_add, [a, b]]
@@ -14534,17 +14534,16 @@ intrinsics:
       - *neon-cfg-arm-unstable
     safety: safe
     types:
-      - ['vaddw_high_s8', 'int16x8_t', 'int8x16_t', 'vaddw', 'saddw2', 'int8x8_t', '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - ['vaddw_high_s16', 'int32x4_t', 'int16x8_t', 'vaddw', 'saddw2', 'int16x4_t', '[4, 5, 6, 7]']
-      - ['vaddw_high_s32', 'int64x2_t', 'int32x4_t', 'vaddw', 'saddw2', 'int32x2_t', '[2, 3]']
-      - ['vaddw_high_u8', 'uint16x8_t', 'uint8x16_t', 'vaddw', 'uaddw2', 'uint8x8_t', '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - ['vaddw_high_u16', 'uint32x4_t', 'uint16x8_t', 'vaddw', 'uaddw2', 'uint16x4_t', '[4, 5, 6, 7]']
-      - ['vaddw_high_u32', 'uint64x2_t', 'uint32x4_t', 'vaddw', 'uaddw2', 'uint32x2_t', '[2, 3]']
+      - ['vaddw_high_s8', 'int16x8_t', 'int8x16_t', 'vaddw', 'saddw2', 'int8x8_t']
+      - ['vaddw_high_s16', 'int32x4_t', 'int16x8_t', 'vaddw', 'saddw2', 'int16x4_t']
+      - ['vaddw_high_s32', 'int64x2_t', 'int32x4_t', 'vaddw', 'saddw2', 'int32x2_t']
+      - ['vaddw_high_u8', 'uint16x8_t', 'uint8x16_t', 'vaddw', 'uaddw2', 'uint8x8_t']
+      - ['vaddw_high_u16', 'uint32x4_t', 'uint16x8_t', 'vaddw', 'uaddw2', 'uint16x4_t']
+      - ['vaddw_high_u32', 'uint64x2_t', 'uint32x4_t', 'vaddw', 'uaddw2', 'uint32x2_t']
     compose:
       - Let:
           - b
-          - '{neon_type[5]}'
-          - FnCall: ['simd_shuffle!', [b, b, '{type[6]}']]
+          - FnCall: ['vget_high_{neon_type[2]}', [b]]
       - Let:
           - b
           - '{neon_type[1]}'

From 1404a1e5b0fe197fe3ab04dafbbef8c2a1d281bc Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Sat, 9 May 2026 05:03:26 +0530
Subject: [PATCH 31/78] Change implementation of `vqdmull{_high}{_lane}` and
 `vqrdmulh_lane`

---
 .../core_arch/src/aarch64/neon/generated.rs   | 76 ++++++----------
 .../src/arm_shared/neon/generated.rs          | 90 +++++--------------
 .../spec/neon/aarch64.spec.yml                | 46 +++++-----
 .../spec/neon/arm_shared.spec.yml             | 24 ++---
 4 files changed, 83 insertions(+), 153 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index eb590b3af3e23..2c49925431cb4 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -14837,11 +14837,9 @@ pub fn vqdmulhs_laneq_s32<const N: i32>(a: i32, b: int32x4_t) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_high_lane_s16<const N: i32>(a: int16x8_t, b: int16x4_t) -> int32x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
-        vqdmull_s16(a, b)
-    }
+    let a = vget_high_s16(a);
+    let b = vdup_lane_s16::<N>(b);
+    vqdmull_s16(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_laneq_s32)"]
@@ -14852,11 +14850,9 @@ pub fn vqdmull_high_lane_s16<const N: i32>(a: int16x8_t, b: int16x4_t) -> int32x
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_high_laneq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int64x2_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-        let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
-        vqdmull_s32(a, b)
-    }
+    let a = vget_high_s32(a);
+    let b = vdup_laneq_s32::<N>(b);
+    vqdmull_s32(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_lane_s32)"]
@@ -14867,11 +14863,9 @@ pub fn vqdmull_high_laneq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int64
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_high_lane_s32<const N: i32>(a: int32x4_t, b: int32x2_t) -> int64x2_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-        let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
-        vqdmull_s32(a, b)
-    }
+    let a = vget_high_s32(a);
+    let b = vdup_lane_s32::<N>(b);
+    vqdmull_s32(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_laneq_s16)"]
@@ -14882,11 +14876,9 @@ pub fn vqdmull_high_lane_s32<const N: i32>(a: int32x4_t, b: int32x2_t) -> int64x
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_high_laneq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int32x4_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
-        vqdmull_s16(a, b)
-    }
+    let a = vget_high_s16(a);
+    let b = vdup_laneq_s16::<N>(b);
+    vqdmull_s16(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_n_s16)"]
@@ -14895,11 +14887,9 @@ pub fn vqdmull_high_laneq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int32
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t {
-    unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        let b: int16x4_t = vdup_n_s16(b);
-        vqdmull_s16(a, b)
-    }
+    let a = vget_high_s16(a);
+    let b = vdup_n_s16(b);
+    vqdmull_s16(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_n_s32)"]
@@ -14908,11 +14898,9 @@ pub fn vqdmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t {
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-        let b: int32x2_t = vdup_n_s32(b);
-        vqdmull_s32(a, b)
-    }
+    let a = vget_high_s32(a);
+    let b = vdup_n_s32(b);
+    vqdmull_s32(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_s16)"]
@@ -14921,11 +14909,9 @@ pub fn vqdmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
-    unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        vqdmull_s16(a, b)
-    }
+    let a = vget_high_s16(a);
+    let b = vget_high_s16(b);
+    vqdmull_s16(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_s32)"]
@@ -14934,11 +14920,9 @@ pub fn vqdmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-        let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-        vqdmull_s32(a, b)
-    }
+    let a = vget_high_s32(a);
+    let b = vget_high_s32(b);
+    vqdmull_s32(a, b)
 }
 #[doc = "Vector saturating doubling long multiply by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_laneq_s16)"]
@@ -14949,10 +14933,8 @@ pub fn vqdmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_laneq_s16<const N: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
-        vqdmull_s16(a, b)
-    }
+    let b = vdup_laneq_s16::<N>(b);
+    vqdmull_s16(a, b)
 }
 #[doc = "Vector saturating doubling long multiply by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_laneq_s32)"]
@@ -14963,10 +14945,8 @@ pub fn vqdmull_laneq_s16<const N: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_laneq_s32<const N: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
-        vqdmull_s32(a, b)
-    }
+    let b = vdup_laneq_s32::<N>(b);
+    vqdmull_s32(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmullh_lane_s16)"]
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index 6d54e84f25089..3f5b7073e264a 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -35001,10 +35001,8 @@ pub fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 )]
 pub fn vqdmull_lane_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        let b: int16x4_t = simd_shuffle!(b, b, [N as u32; 4]);
-        vqdmull_s16(a, b)
-    }
+    let b = vdup_lane_s16::<N>(b);
+    vqdmull_s16(a, b)
 }
 #[doc = "Vector saturating doubling long multiply by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s32)"]
@@ -35027,10 +35025,8 @@ pub fn vqdmull_lane_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
 )]
 pub fn vqdmull_lane_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe {
-        let b: int32x2_t = simd_shuffle!(b, b, [N as u32; 2]);
-        vqdmull_s32(a, b)
-    }
+    let b = vdup_lane_s32::<N>(b);
+    vqdmull_s32(a, b)
 }
 #[doc = "Vector saturating doubling long multiply with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s16)"]
@@ -35588,11 +35584,8 @@ pub fn vqnegq_s32(a: int32x4_t) -> int32x4_t {
 )]
 pub fn vqrdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let b: int16x4_t =
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vqrdmulh_s16(a, b)
-    }
+    let b = vdup_lane_s16::<LANE>(b);
+    vqrdmulh_s16(a, b)
 }
 #[doc = "Vector rounding saturating doubling multiply high by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s32)"]
@@ -35615,10 +35608,8 @@ pub fn vqrdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4
 )]
 pub fn vqrdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let b: int32x2_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32]);
-        vqrdmulh_s32(a, b)
-    }
+    let b = vdup_lane_s32::<LANE>(b);
+    vqrdmulh_s32(a, b)
 }
 #[doc = "Vector rounding saturating doubling multiply high by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s16)"]
@@ -35641,11 +35632,8 @@ pub fn vqrdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2
 )]
 pub fn vqrdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        let b: int16x4_t =
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vqrdmulh_s16(a, b)
-    }
+    let b = vdup_laneq_s16::<LANE>(b);
+    vqrdmulh_s16(a, b)
 }
 #[doc = "Vector rounding saturating doubling multiply high by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s32)"]
@@ -35668,10 +35656,8 @@ pub fn vqrdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x
 )]
 pub fn vqrdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let b: int32x2_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32]);
-        vqrdmulh_s32(a, b)
-    }
+    let b = vdup_laneq_s32::<LANE>(b);
+    vqrdmulh_s32(a, b)
 }
 #[doc = "Vector rounding saturating doubling multiply high by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s16)"]
@@ -35694,23 +35680,8 @@ pub fn vqrdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x
 )]
 pub fn vqrdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let b: int16x8_t = simd_shuffle!(
-            b,
-            b,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        );
-        vqrdmulhq_s16(a, b)
-    }
+    let b = vdupq_lane_s16::<LANE>(b);
+    vqrdmulhq_s16(a, b)
 }
 #[doc = "Vector rounding saturating doubling multiply high by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s32)"]
@@ -35733,11 +35704,8 @@ pub fn vqrdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x
 )]
 pub fn vqrdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let b: int32x4_t =
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vqrdmulhq_s32(a, b)
-    }
+    let b = vdupq_lane_s32::<LANE>(b);
+    vqrdmulhq_s32(a, b)
 }
 #[doc = "Vector rounding saturating doubling multiply high by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s16)"]
@@ -35760,23 +35728,8 @@ pub fn vqrdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x
 )]
 pub fn vqrdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        let b: int16x8_t = simd_shuffle!(
-            b,
-            b,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        );
-        vqrdmulhq_s16(a, b)
-    }
+    let b = vdupq_laneq_s16::<LANE>(b);
+    vqrdmulhq_s16(a, b)
 }
 #[doc = "Vector rounding saturating doubling multiply high by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s32)"]
@@ -35799,11 +35752,8 @@ pub fn vqrdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16
 )]
 pub fn vqrdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let b: int32x4_t =
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vqrdmulhq_s32(a, b)
-    }
+    let b = vdupq_laneq_s32::<LANE>(b);
+    vqrdmulhq_s32(a, b)
 }
 #[doc = "Vector saturating rounding doubling multiply high with scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s16)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index a6208fac7c350..11ba7c955dd6f 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -6963,11 +6963,11 @@ intrinsics:
       - *neon-stable
     safety: safe
     types:
-      - [int16x8_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]']
-      - [int32x4_t, int64x2_t, int32x2_t, '[2, 3]']
+      - [int16x8_t, int32x4_t]
+      - [int32x4_t, int64x2_t]
     compose:
-      - Let: [a, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, '{type[3]}']]}]
-      - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [b, b, '{type[3]}']]}]
+      - Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
+      - Let: [b, {FnCall: ['vget_high_{neon_type[0]}', [b]]}]
       - FnCall: ["vqdmull{neon_type[0].noq}", [a, b]]
 
   - name: "vqdmull_high_n_{type[1]}"
@@ -6979,11 +6979,11 @@ intrinsics:
       - *neon-stable
     safety: safe
     types:
-      - [int16x8_t, "i16", int32x4_t, int16x4_t, '[4, 5, 6, 7]']
-      - [int32x4_t, "i32", int64x2_t, int32x2_t, '[2, 3]']
+      - [int16x8_t, "i16", int32x4_t]
+      - [int32x4_t, "i32", int64x2_t]
     compose:
-      - Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
-      - Let: [b, "{neon_type[3]}", {FnCall: ["vdup_n{neon_type[0].noq}", [b]]}]
+      - Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
+      - Let: [b, {FnCall: ["vdup_n{neon_type[0].noq}", [b]]}]
       - FnCall: ["vqdmull{neon_type[0].noq}", [a, b]]
 
   - name: "vqdmull{type[3]}"
@@ -7038,7 +7038,7 @@ intrinsics:
       - Let: [b, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [b], [N]]}]
       - FnCall: ["vqdmulls_s32", [a, b]]
 
-  - name: "vqdmull{type[6]}"
+  - name: "vqdmull{type[3]}"
     doc: "Signed saturating doubling multiply long"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
@@ -7049,12 +7049,12 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int16x8_t, int16x4_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]', '[N as u32, N as u32, N as u32, N as u32]', '_high_lane_s16']
-      - [int32x4_t, int32x4_t, int64x2_t, int32x2_t, '[2, 3]', '[N as u32, N as u32]', '_high_laneq_s32']
+      - [int16x8_t, int16x4_t, int32x4_t, '_high_lane_s16']
+      - [int32x4_t, int32x4_t, int64x2_t, '_high_laneq_s32']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, '2']]
-      - Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
-      - Let: [b, "{neon_type[3]}", {FnCall: [simd_shuffle!, [b, b, "{type[5]}"]]}]
+      - Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
+      - Let: [b, {FnCall: ['vdup_lane{neon_type[1].nox}', [b], [N]]}]
       - FnCall: ["vqdmull{neon_type[0].noq}", [a, b]]
 
   - name: "vqdmull_high_lane_s32"
@@ -7068,11 +7068,11 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int32x4_t, int32x2_t, int64x2_t, int32x2_t, '[2, 3]', '[N as u32, N as u32]']
+      - [int32x4_t, int32x2_t, int64x2_t]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, '1']]
-      - Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
-      - Let: [b, "{neon_type[3]}", {FnCall: [simd_shuffle!, [b, b, "{type[5]}"]]}]
+      - Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
+      - Let: [b, {FnCall: ['vdup_lane{neon_type[1].nox}', [b], [N]]}]
       - FnCall: ["vqdmull{neon_type[0].noq}", [a, b]]
 
   - name: "vqdmull_high_laneq_s16"
@@ -7086,11 +7086,11 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int16x8_t, int16x8_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]', '[N as u32, N as u32, N as u32, N as u32]']
+      - [int16x8_t, int16x8_t, int32x4_t]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, '3']]
-      - Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
-      - Let: [b, "{neon_type[3]}", {FnCall: [simd_shuffle!, [b, b, "{type[5]}"]]}]
+      - Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
+      - Let: [b, {FnCall: ['vdup_lane{neon_type[1].nox}', [b], [N]]}]
       - FnCall: ["vqdmull{neon_type[0].noq}", [a, b]]
 
   - name: "vqdmull_laneq_s16"
@@ -7104,10 +7104,10 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int16x4_t, int16x8_t, int32x4_t, '[N as u32, N as u32, N as u32, N as u32]']
+      - [int16x4_t, int16x8_t, int32x4_t]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, '3']]
-      - Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
+      - Let: [b, {FnCall: ['vdup_lane{neon_type[1].nox}', [b], [N]]}]
       - FnCall: [vqdmull_s16, [a, b]]
 
   - name: "vqdmull_laneq_s32"
@@ -7121,10 +7121,10 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int32x2_t, int32x4_t, int64x2_t, '[N as u32, N as u32]']
+      - [int32x2_t, int32x4_t, int64x2_t]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, '2']]
-      - Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
+      - Let: [b, {FnCall: ['vdup_lane{neon_type[1].nox}', [b], [N]]}]
       - FnCall: [vqdmull_s32, [a, b]]
 
   - name: "vqdmlal{type[4]}"
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
index 4024e624ca9ba..e646d22369f80 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@@ -7590,7 +7590,7 @@ intrinsics:
       - [int16x4_t, int16x4_t, int32x4_t, '[N as u32; 4]']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, '2']]
-      - Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
+      - Let: [b, {FnCall: ['vdup{neon_type[0].lane_nox}', [b], [N]]}]
       - FnCall: [vqdmull_s16, [a, b]]
 
   - name: "vqdmull_lane_s32"
@@ -7607,10 +7607,10 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int32x2_t, int32x2_t, int64x2_t, '[N as u32; 2]']
+      - [int32x2_t, int32x2_t, int64x2_t]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, '1']]
-      - Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
+      - Let: [b, {FnCall: ['vdup{neon_type[0].lane_nox}', [b], [N]]}]
       - FnCall: [vqdmull_s32, [a, b]]
 
   - name: "vqdmlal{neon_type[1].noq}"
@@ -11230,17 +11230,17 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [_lane_s16, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_laneq_s16, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_lane_s16, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_lane_s32, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [_laneq_s32, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
-      - [q_lane_s32, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_s32, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_lane_s16, int16x4_t, int16x4_t, '2']
+      - [_laneq_s16, int16x4_t, int16x8_t, '3']
+      - [q_lane_s16, int16x8_t, int16x4_t, '2']
+      - [q_laneq_s16, int16x8_t, int16x8_t, '3']
+      - [_lane_s32, int32x2_t, int32x2_t, '1']
+      - [_laneq_s32, int32x2_t, int32x4_t, '2']
+      - [q_lane_s32, int32x4_t, int32x2_t, '1']
+      - [q_laneq_s32, int32x4_t, int32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
-      - Let: [b, "{neon_type[1]}", {FnCall: [simd_shuffle!, [b, b, '{type[4]}']]}]
+      - Let: [b, {FnCall: ['vdup{type[0]}', [b], [LANE]]}]
       - FnCall: ["vqrdmulh{neon_type[1].no}", [a, b]]
 
   - name: "vqrdmulh{neon_type[0].N}"

From 077f63f91e4609c5bc1d1d87aa1ed7aaf0dd992e Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Sat, 9 May 2026 05:11:39 +0530
Subject: [PATCH 32/78] Change implementation of `vqmov{u}n_high`

---
 .../core_arch/src/aarch64/neon/generated.rs   | 36 +++++--------------
 .../spec/neon/aarch64.spec.yml                | 24 ++++++-------
 2 files changed, 21 insertions(+), 39 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 2c49925431cb4..60a9e99a3b99c 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -15030,13 +15030,7 @@ pub fn vqdmulls_s32(a: i32, b: i32) -> i64 {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(sqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t {
-    unsafe {
-        simd_shuffle!(
-            a,
-            vqmovn_s16(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_s8(a, vqmovn_s16(b))
 }
 #[doc = "Signed saturating extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_s32)"]
@@ -15045,7 +15039,7 @@ pub fn vqmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(sqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t {
-    unsafe { simd_shuffle!(a, vqmovn_s32(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_s16(a, vqmovn_s32(b))
 }
 #[doc = "Signed saturating extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_s64)"]
@@ -15054,7 +15048,7 @@ pub fn vqmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(sqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t {
-    unsafe { simd_shuffle!(a, vqmovn_s64(b), [0, 1, 2, 3]) }
+    vcombine_s32(a, vqmovn_s64(b))
 }
 #[doc = "Signed saturating extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_u16)"]
@@ -15063,13 +15057,7 @@ pub fn vqmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(uqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
-    unsafe {
-        simd_shuffle!(
-            a,
-            vqmovn_u16(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_u8(a, vqmovn_u16(b))
 }
 #[doc = "Signed saturating extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_u32)"]
@@ -15078,7 +15066,7 @@ pub fn vqmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(uqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
-    unsafe { simd_shuffle!(a, vqmovn_u32(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_u16(a, vqmovn_u32(b))
 }
 #[doc = "Signed saturating extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_u64)"]
@@ -15087,7 +15075,7 @@ pub fn vqmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(uqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
-    unsafe { simd_shuffle!(a, vqmovn_u64(b), [0, 1, 2, 3]) }
+    vcombine_u32(a, vqmovn_u64(b))
 }
 #[doc = "Saturating extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovnd_s64)"]
@@ -15164,13 +15152,7 @@ pub fn vqmovns_u32(a: u32) -> u16 {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(sqxtun2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovun_high_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
-    unsafe {
-        simd_shuffle!(
-            a,
-            vqmovun_s16(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_u8(a, vqmovun_s16(b))
 }
 #[doc = "Signed saturating extract unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_high_s32)"]
@@ -15179,7 +15161,7 @@ pub fn vqmovun_high_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(sqxtun2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovun_high_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
-    unsafe { simd_shuffle!(a, vqmovun_s32(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_u16(a, vqmovun_s32(b))
 }
 #[doc = "Signed saturating extract unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_high_s64)"]
@@ -15188,7 +15170,7 @@ pub fn vqmovun_high_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(sqxtun2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovun_high_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
-    unsafe { simd_shuffle!(a, vqmovun_s64(b), [0, 1, 2, 3]) }
+    vcombine_u32(a, vqmovun_s64(b))
 }
 #[doc = "Signed saturating extract unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovunh_s16)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 11ba7c955dd6f..cb20ff24d20bf 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -7416,11 +7416,11 @@ intrinsics:
       - *neon-stable
     safety: safe
     types:
-      - [int8x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int16x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [int32x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]']
+      - [int8x8_t, int16x8_t, int8x16_t]
+      - [int16x4_t, int32x4_t, int16x8_t]
+      - [int32x2_t, int64x2_t, int32x4_t]
     compose:
-      - FnCall: [simd_shuffle!, [a, {FnCall: ["vqmovn{neon_type[1].noq}", [b]]}, "{type[3]}"]]
+      - FnCall: ['vcombine_{neon_type[0]}', [a, {FnCall: ["vqmovn{neon_type[1].noq}", [b]]}]]
 
   - name: "vqmovn_high{neon_type[1].noq}"
     doc: "Signed saturating extract narrow"
@@ -7431,11 +7431,11 @@ intrinsics:
       - *neon-stable
     safety: safe
     types:
-      - [uint8x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [uint32x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]']
+      - [uint8x8_t, uint16x8_t, uint8x16_t]
+      - [uint16x4_t, uint32x4_t, uint16x8_t]
+      - [uint32x2_t, uint64x2_t, uint32x4_t]
     compose:
-      - FnCall: [simd_shuffle!, [a, {FnCall: ["vqmovn{neon_type[1].noq}", [b]]}, "{type[3]}"]]
+      - FnCall: ['vcombine_{neon_type[0]}', [a, {FnCall: ["vqmovn{neon_type[1].noq}", [b]]}]]
 
   - name: "vqmovn{type[2]}"
     doc: "Saturating extract narrow"
@@ -7523,11 +7523,11 @@ intrinsics:
       - *neon-stable
     safety: safe
     types:
-      - [uint8x8_t, int16x8_t, uint8x16_t, s16, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x4_t, int32x4_t, uint16x8_t, s32, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [uint32x2_t, int64x2_t, uint32x4_t, s64, '[0, 1, 2, 3]']
+      - [uint8x8_t, int16x8_t, uint8x16_t, s16]
+      - [uint16x4_t, int32x4_t, uint16x8_t, s32]
+      - [uint32x2_t, int64x2_t, uint32x4_t, s64]
     compose:
-      - FnCall: [simd_shuffle!, [a, {FnCall: ["vqmovun_{type[3]}", [b]]}, "{type[4]}"]]
+      - FnCall: ['vcombine_{neon_type[0]}', [a, {FnCall: ["vqmovun_{type[3]}", [b]]}]]
 
   - name: "vqrdmulh{type[1]}"
     doc: "Signed saturating rounding doubling multiply returning high half"

From 933aa5c3b5b83ae2b3daec726aa56128fc1bf152 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Sat, 9 May 2026 05:15:21 +0530
Subject: [PATCH 33/78] Change implementation of `vqrdml{a,s}h_lane`

---
 .../core_arch/src/aarch64/neon/generated.rs   | 96 +++++++------------
 .../spec/neon/aarch64.spec.yml                | 36 +++----
 2 files changed, 50 insertions(+), 82 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 60a9e99a3b99c..112c84036fbb7 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -15276,10 +15276,8 @@ pub fn vqnegd_s64(a: i64) -> i64 {
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlah_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32; 4]);
-        vqrdmlah_s16(a, b, c)
-    }
+    let c = vdup_lane_s16::<LANE>(c);
+    vqrdmlah_s16(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_lane_s32)"]
@@ -15290,10 +15288,8 @@ pub fn vqrdmlah_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlah_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32; 2]);
-        vqrdmlah_s32(a, b, c)
-    }
+    let c = vdup_lane_s32::<LANE>(c);
+    vqrdmlah_s32(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_laneq_s16)"]
@@ -15304,10 +15300,8 @@ pub fn vqrdmlah_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlah_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32; 4]);
-        vqrdmlah_s16(a, b, c)
-    }
+    let c = vdup_laneq_s16::<LANE>(c);
+    vqrdmlah_s16(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_laneq_s32)"]
@@ -15318,10 +15312,8 @@ pub fn vqrdmlah_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlah_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32; 2]);
-        vqrdmlah_s32(a, b, c)
-    }
+    let c = vdup_laneq_s32::<LANE>(c);
+    vqrdmlah_s32(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_lane_s16)"]
@@ -15332,10 +15324,8 @@ pub fn vqrdmlah_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlahq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: int16x8_t = simd_shuffle!(c, c, [LANE as u32; 8]);
-        vqrdmlahq_s16(a, b, c)
-    }
+    let c = vdupq_lane_s16::<LANE>(c);
+    vqrdmlahq_s16(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_lane_s32)"]
@@ -15346,10 +15336,8 @@ pub fn vqrdmlahq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlahq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32; 4]);
-        vqrdmlahq_s32(a, b, c)
-    }
+    let c = vdupq_lane_s32::<LANE>(c);
+    vqrdmlahq_s32(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_laneq_s16)"]
@@ -15360,10 +15348,8 @@ pub fn vqrdmlahq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlahq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        let c: int16x8_t = simd_shuffle!(c, c, [LANE as u32; 8]);
-        vqrdmlahq_s16(a, b, c)
-    }
+    let c = vdupq_laneq_s16::<LANE>(c);
+    vqrdmlahq_s16(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_laneq_s32)"]
@@ -15374,10 +15360,8 @@ pub fn vqrdmlahq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlahq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32; 4]);
-        vqrdmlahq_s32(a, b, c)
-    }
+    let c = vdupq_laneq_s32::<LANE>(c);
+    vqrdmlahq_s32(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_s16)"]
@@ -15520,10 +15504,8 @@ pub fn vqrdmlahs_s32(a: i32, b: i32, c: i32) -> i32 {
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlsh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32; 4]);
-        vqrdmlsh_s16(a, b, c)
-    }
+    let c = vdup_lane_s16::<LANE>(c);
+    vqrdmlsh_s16(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_lane_s32)"]
@@ -15534,10 +15516,8 @@ pub fn vqrdmlsh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlsh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32; 2]);
-        vqrdmlsh_s32(a, b, c)
-    }
+    let c = vdup_lane_s32::<LANE>(c);
+    vqrdmlsh_s32(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_laneq_s16)"]
@@ -15548,10 +15528,8 @@ pub fn vqrdmlsh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlsh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32; 4]);
-        vqrdmlsh_s16(a, b, c)
-    }
+    let c = vdup_laneq_s16::<LANE>(c);
+    vqrdmlsh_s16(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_laneq_s32)"]
@@ -15562,10 +15540,8 @@ pub fn vqrdmlsh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlsh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32; 2]);
-        vqrdmlsh_s32(a, b, c)
-    }
+    let c = vdup_laneq_s32::<LANE>(c);
+    vqrdmlsh_s32(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_lane_s16)"]
@@ -15576,10 +15552,8 @@ pub fn vqrdmlsh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlshq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: int16x8_t = simd_shuffle!(c, c, [LANE as u32; 8]);
-        vqrdmlshq_s16(a, b, c)
-    }
+    let c = vdupq_lane_s16::<LANE>(c);
+    vqrdmlshq_s16(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_lane_s32)"]
@@ -15590,10 +15564,8 @@ pub fn vqrdmlshq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlshq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32; 4]);
-        vqrdmlshq_s32(a, b, c)
-    }
+    let c = vdupq_lane_s32::<LANE>(c);
+    vqrdmlshq_s32(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_laneq_s16)"]
@@ -15604,10 +15576,8 @@ pub fn vqrdmlshq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlshq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        let c: int16x8_t = simd_shuffle!(c, c, [LANE as u32; 8]);
-        vqrdmlshq_s16(a, b, c)
-    }
+    let c = vdupq_laneq_s16::<LANE>(c);
+    vqrdmlshq_s16(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_laneq_s32)"]
@@ -15618,10 +15588,8 @@ pub fn vqrdmlshq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlshq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32; 4]);
-        vqrdmlshq_s32(a, b, c)
-    }
+    let c = vdupq_laneq_s32::<LANE>(c);
+    vqrdmlshq_s32(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_s16)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index cb20ff24d20bf..a805fbd2058a6 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -7613,17 +7613,17 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32; 4]']
-      - [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32; 4]']
-      - [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]']
-      - [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]']
-      - [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32; 2]']
-      - [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32; 2]']
-      - [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]']
-      - [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]']
+      - [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2']
+      - [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3']
+      - [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2']
+      - [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3']
+      - [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1']
+      - [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2']
+      - [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1']
+      - [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']]
-      - Let: [c, "{type[1]}", {FnCall: [simd_shuffle!, [c, c, "{type[5]}"]]}]
+      - Let: [c, {FnCall: ['vdup{type[0]}', [c], [LANE]]}]
       - FnCall: ["vqrdmlah{neon_type[2].no}", [a, b, c]]
 
   - name: "vqrdmlah{type[4]}"
@@ -7697,17 +7697,17 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32; 4]']
-      - [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32; 4]']
-      - [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]']
-      - [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]']
-      - [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32; 2]']
-      - [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32; 2]']
-      - [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]']
-      - [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]']
+      - [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2']
+      - [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3']
+      - [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2']
+      - [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3']
+      - [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1']
+      - [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2']
+      - [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1']
+      - [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']]
-      - Let: [c, "{type[1]}", {FnCall: [simd_shuffle!, [c, c, "{type[5]}"]]}]
+      - Let: [c, {FnCall: ['vdup{type[0]}', [c], [LANE]]}]
       - FnCall: ["vqrdmlsh{neon_type[2].no}", [a, b, c]]
 
   - name: "vqrdmlsh{type[3]}"

From d4bb7f4fdd9d4db982cd1a1d79dd357300433d4b Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Sat, 9 May 2026 05:49:07 +0530
Subject: [PATCH 34/78] Change implementation of `v{q}{r}shr{u}n_high_n`

---
 .../core_arch/src/aarch64/neon/generated.rs   | 156 +++++-------------
 .../spec/neon/aarch64.spec.yml                | 111 ++++++-------
 2 files changed, 94 insertions(+), 173 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 112c84036fbb7..1cbac3e284b55 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -15902,13 +15902,7 @@ pub fn vqrshld_u64(a: u64, b: i64) -> u64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
     static_assert!(N >= 1 && N <= 8);
-    unsafe {
-        simd_shuffle!(
-            a,
-            vqrshrn_n_s16::<N>(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_s8(a, vqrshrn_n_s16::<N>(b))
 }
 #[doc = "Signed saturating rounded shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_s32)"]
@@ -15919,7 +15913,7 @@ pub fn vqrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_shuffle!(a, vqrshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_s16(a, vqrshrn_n_s32::<N>(b))
 }
 #[doc = "Signed saturating rounded shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_s64)"]
@@ -15930,7 +15924,7 @@ pub fn vqrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_shuffle!(a, vqrshrn_n_s64::<N>(b), [0, 1, 2, 3]) }
+    vcombine_s32(a, vqrshrn_n_s64::<N>(b))
 }
 #[doc = "Unsigned saturating rounded shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u16)"]
@@ -15941,13 +15935,7 @@ pub fn vqrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
     static_assert!(N >= 1 && N <= 8);
-    unsafe {
-        simd_shuffle!(
-            a,
-            vqrshrn_n_u16::<N>(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_u8(a, vqrshrn_n_u16::<N>(b))
 }
 #[doc = "Unsigned saturating rounded shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u32)"]
@@ -15958,7 +15946,7 @@ pub fn vqrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_shuffle!(a, vqrshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_u16(a, vqrshrn_n_u32::<N>(b))
 }
 #[doc = "Unsigned saturating rounded shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u64)"]
@@ -15969,7 +15957,7 @@ pub fn vqrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_shuffle!(a, vqrshrn_n_u64::<N>(b), [0, 1, 2, 3]) }
+    vcombine_u32(a, vqrshrn_n_u64::<N>(b))
 }
 #[doc = "Unsigned saturating rounded shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnd_n_u64)"]
@@ -16052,13 +16040,7 @@ pub fn vqrshrnd_n_s64<const N: i32>(a: i64) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
     static_assert!(N >= 1 && N <= 8);
-    unsafe {
-        simd_shuffle!(
-            a,
-            vqrshrun_n_s16::<N>(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_u8(a, vqrshrun_n_s16::<N>(b))
 }
 #[doc = "Signed saturating rounded shift right unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s32)"]
@@ -16069,7 +16051,7 @@ pub fn vqrshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_shuffle!(a, vqrshrun_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_u16(a, vqrshrun_n_s32::<N>(b))
 }
 #[doc = "Signed saturating rounded shift right unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s64)"]
@@ -16080,7 +16062,7 @@ pub fn vqrshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_shuffle!(a, vqrshrun_n_s64::<N>(b), [0, 1, 2, 3]) }
+    vcombine_u32(a, vqrshrun_n_s64::<N>(b))
 }
 #[doc = "Signed saturating rounded shift right unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrund_n_s64)"]
@@ -16351,13 +16333,7 @@ pub fn vqshlus_n_s32<const N: i32>(a: i32) -> u32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
     static_assert!(N >= 1 && N <= 8);
-    unsafe {
-        simd_shuffle!(
-            a,
-            vqshrn_n_s16::<N>(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_s8(a, vqshrn_n_s16::<N>(b))
 }
 #[doc = "Signed saturating shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s32)"]
@@ -16368,7 +16344,7 @@ pub fn vqshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_shuffle!(a, vqshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_s16(a, vqshrn_n_s32::<N>(b))
 }
 #[doc = "Signed saturating shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s64)"]
@@ -16379,7 +16355,7 @@ pub fn vqshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_shuffle!(a, vqshrn_n_s64::<N>(b), [0, 1, 2, 3]) }
+    vcombine_s32(a, vqshrn_n_s64::<N>(b))
 }
 #[doc = "Unsigned saturating shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u16)"]
@@ -16390,13 +16366,7 @@ pub fn vqshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
     static_assert!(N >= 1 && N <= 8);
-    unsafe {
-        simd_shuffle!(
-            a,
-            vqshrn_n_u16::<N>(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_u8(a, vqshrn_n_u16::<N>(b))
 }
 #[doc = "Unsigned saturating shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u32)"]
@@ -16407,7 +16377,7 @@ pub fn vqshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_shuffle!(a, vqshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_u16(a, vqshrn_n_u32::<N>(b))
 }
 #[doc = "Unsigned saturating shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u64)"]
@@ -16418,7 +16388,7 @@ pub fn vqshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_shuffle!(a, vqshrn_n_u64::<N>(b), [0, 1, 2, 3]) }
+    vcombine_u32(a, vqshrn_n_u64::<N>(b))
 }
 #[doc = "Signed saturating shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrnd_n_s64)"]
@@ -16509,13 +16479,7 @@ pub fn vqshrns_n_u32<const N: i32>(a: u32) -> u16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
     static_assert!(N >= 1 && N <= 8);
-    unsafe {
-        simd_shuffle!(
-            a,
-            vqshrun_n_s16::<N>(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_u8(a, vqshrun_n_s16::<N>(b))
 }
 #[doc = "Signed saturating shift right unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s32)"]
@@ -16526,7 +16490,7 @@ pub fn vqshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16_
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_shuffle!(a, vqshrun_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_u16(a, vqshrun_n_s32::<N>(b))
 }
 #[doc = "Signed saturating shift right unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s64)"]
@@ -16537,7 +16501,7 @@ pub fn vqshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x8
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_shuffle!(a, vqshrun_n_s64::<N>(b), [0, 1, 2, 3]) }
+    vcombine_u32(a, vqshrun_n_s64::<N>(b))
 }
 #[doc = "Signed saturating shift right unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrund_n_s64)"]
@@ -20128,13 +20092,7 @@ pub fn vrshrd_n_u64<const N: i32>(a: u64) -> u64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
     static_assert!(N >= 1 && N <= 8);
-    unsafe {
-        simd_shuffle!(
-            a,
-            vrshrn_n_s16::<N>(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_s8(a, vrshrn_n_s16::<N>(b))
 }
 #[doc = "Rounding shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s32)"]
@@ -20145,7 +20103,7 @@ pub fn vrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_shuffle!(a, vrshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_s16(a, vrshrn_n_s32::<N>(b))
 }
 #[doc = "Rounding shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s64)"]
@@ -20156,7 +20114,7 @@ pub fn vrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_shuffle!(a, vrshrn_n_s64::<N>(b), [0, 1, 2, 3]) }
+    vcombine_s32(a, vrshrn_n_s64::<N>(b))
 }
 #[doc = "Rounding shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u16)"]
@@ -20167,13 +20125,7 @@ pub fn vrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
     static_assert!(N >= 1 && N <= 8);
-    unsafe {
-        simd_shuffle!(
-            a,
-            vrshrn_n_u16::<N>(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_u8(a, vrshrn_n_u16::<N>(b))
 }
 #[doc = "Rounding shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u32)"]
@@ -20184,7 +20136,7 @@ pub fn vrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_shuffle!(a, vrshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_u16(a, vrshrn_n_u32::<N>(b))
 }
 #[doc = "Rounding shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u64)"]
@@ -20195,7 +20147,7 @@ pub fn vrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_shuffle!(a, vrshrn_n_u64::<N>(b), [0, 1, 2, 3]) }
+    vcombine_u32(a, vrshrn_n_u64::<N>(b))
 }
 #[doc = "Reciprocal square-root estimate."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f64)"]
@@ -20708,10 +20660,8 @@ pub fn vshld_u64(a: u64, b: i64) -> u64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshll_high_n_s8<const N: i32>(a: int8x16_t) -> int16x8_t {
     static_assert!(N >= 0 && N <= 8);
-    unsafe {
-        let b: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        vshll_n_s8::<N>(b)
-    }
+    let b = vget_high_s8(a);
+    vshll_n_s8::<N>(b)
 }
 #[doc = "Signed shift left long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s16)"]
@@ -20722,10 +20672,8 @@ pub fn vshll_high_n_s8<const N: i32>(a: int8x16_t) -> int16x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshll_high_n_s16<const N: i32>(a: int16x8_t) -> int32x4_t {
     static_assert!(N >= 0 && N <= 16);
-    unsafe {
-        let b: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        vshll_n_s16::<N>(b)
-    }
+    let b = vget_high_s16(a);
+    vshll_n_s16::<N>(b)
 }
 #[doc = "Signed shift left long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s32)"]
@@ -20736,10 +20684,8 @@ pub fn vshll_high_n_s16<const N: i32>(a: int16x8_t) -> int32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshll_high_n_s32<const N: i32>(a: int32x4_t) -> int64x2_t {
     static_assert!(N >= 0 && N <= 32);
-    unsafe {
-        let b: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-        vshll_n_s32::<N>(b)
-    }
+    let b = vget_high_s32(a);
+    vshll_n_s32::<N>(b)
 }
 #[doc = "Signed shift left long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u8)"]
@@ -20750,10 +20696,8 @@ pub fn vshll_high_n_s32<const N: i32>(a: int32x4_t) -> int64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshll_high_n_u8<const N: i32>(a: uint8x16_t) -> uint16x8_t {
     static_assert!(N >= 0 && N <= 8);
-    unsafe {
-        let b: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        vshll_n_u8::<N>(b)
-    }
+    let b: uint8x8_t = vget_high_u8(a);
+    vshll_n_u8::<N>(b)
 }
 #[doc = "Signed shift left long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u16)"]
@@ -20764,10 +20708,8 @@ pub fn vshll_high_n_u8<const N: i32>(a: uint8x16_t) -> uint16x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshll_high_n_u16<const N: i32>(a: uint16x8_t) -> uint32x4_t {
     static_assert!(N >= 0 && N <= 16);
-    unsafe {
-        let b: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        vshll_n_u16::<N>(b)
-    }
+    let b: uint16x4_t = vget_high_u16(a);
+    vshll_n_u16::<N>(b)
 }
 #[doc = "Signed shift left long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u32)"]
@@ -20778,10 +20720,8 @@ pub fn vshll_high_n_u16<const N: i32>(a: uint16x8_t) -> uint32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshll_high_n_u32<const N: i32>(a: uint32x4_t) -> uint64x2_t {
     static_assert!(N >= 0 && N <= 32);
-    unsafe {
-        let b: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
-        vshll_n_u32::<N>(b)
-    }
+    let b: uint32x2_t = vget_high_u32(a);
+    vshll_n_u32::<N>(b)
 }
 #[doc = "Shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s16)"]
@@ -20792,13 +20732,7 @@ pub fn vshll_high_n_u32<const N: i32>(a: uint32x4_t) -> uint64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
     static_assert!(N >= 1 && N <= 8);
-    unsafe {
-        simd_shuffle!(
-            a,
-            vshrn_n_s16::<N>(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_s8(a, vshrn_n_s16::<N>(b))
 }
 #[doc = "Shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s32)"]
@@ -20809,7 +20743,7 @@ pub fn vshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_shuffle!(a, vshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_s16(a, vshrn_n_s32::<N>(b))
 }
 #[doc = "Shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s64)"]
@@ -20820,7 +20754,7 @@ pub fn vshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_shuffle!(a, vshrn_n_s64::<N>(b), [0, 1, 2, 3]) }
+    vcombine_s32(a, vshrn_n_s64::<N>(b))
 }
 #[doc = "Shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u16)"]
@@ -20831,13 +20765,7 @@ pub fn vshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
     static_assert!(N >= 1 && N <= 8);
-    unsafe {
-        simd_shuffle!(
-            a,
-            vshrn_n_u16::<N>(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_u8(a, vshrn_n_u16::<N>(b))
 }
 #[doc = "Shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u32)"]
@@ -20848,7 +20776,7 @@ pub fn vshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_shuffle!(a, vshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_u16(a, vshrn_n_u32::<N>(b))
 }
 #[doc = "Shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u64)"]
@@ -20859,7 +20787,7 @@ pub fn vshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_shuffle!(a, vshrn_n_u64::<N>(b), [0, 1, 2, 3]) }
+    vcombine_u32(a, vshrn_n_u64::<N>(b))
 }
 #[doc = "Shift Left and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s8)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index a805fbd2058a6..f6705033f9230 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -7828,12 +7828,12 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int8x8_t, int16x8_t, int8x16_t, '_high_n_s16', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]', 'N >= 1 && N <= 8']
-      - [int16x4_t, int32x4_t, int16x8_t, '_high_n_s32', '[0, 1, 2, 3, 4, 5, 6, 7]', 'N >= 1 && N <= 16']
-      - [int32x2_t, int64x2_t, int32x4_t, '_high_n_s64', '[0, 1, 2, 3]', 'N >= 1 && N <= 32']
+      - [int8x8_t, int16x8_t, int8x16_t, '_high_n_s16', 'N >= 1 && N <= 8']
+      - [int16x4_t, int32x4_t, int16x8_t, '_high_n_s32', 'N >= 1 && N <= 16']
+      - [int32x2_t, int64x2_t, int32x4_t, '_high_n_s64', 'N >= 1 && N <= 32']
     compose:
-      - FnCall: [static_assert!, ["{type[5]}"]]
-      - FnCall: [simd_shuffle!, [a, {FnCall: ["vqrshrn_n{neon_type[1].noq}::<N>", [b]]}, "{type[4]}"]]
+      - FnCall: [static_assert!, ["{type[4]}"]]
+      - FnCall: ['vcombine_{neon_type[0]}', [a, {FnCall: ["vqrshrn_n{neon_type[1].noq}::<N>", [b]]}]]
 
   - name: "vqrshrn{type[0]}"
     doc: "Unsigned saturating rounded shift right narrow"
@@ -7865,18 +7865,17 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
+      - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8']
+      - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16']
+      - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32']
     compose:
       - FnCall: [static_assert!, ['{type[3]}']]
       - FnCall:
-          - simd_shuffle!
+          - 'vcombine_{neon_type[0]}'
           - - a
             - FnCall:
                 - "vqrshrn_n{neon_type[1].noq}::<N>"
                 - - b
-            - "{type[4]}"
 
   - name: "vqrshrun{type[0]}"
     doc: "Signed saturating rounded shift right unsigned narrow"
@@ -7916,18 +7915,17 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [uint8x8_t, int16x8_t, uint8x16_t, 'N >= 1 && N <= 8', s16, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x4_t, int32x4_t, uint16x8_t, 'N >= 1 && N <= 16', s32, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [uint32x2_t, int64x2_t, uint32x4_t, 'N >= 1 && N <= 32', s64, '[0, 1, 2, 3]']
+      - [uint8x8_t, int16x8_t, uint8x16_t, 'N >= 1 && N <= 8']
+      - [uint16x4_t, int32x4_t, uint16x8_t, 'N >= 1 && N <= 16']
+      - [uint32x2_t, int64x2_t, uint32x4_t, 'N >= 1 && N <= 32']
     compose:
       - FnCall: [static_assert!, ["{type[3]}"]]
       - FnCall:
-          - simd_shuffle!
+          - 'vcombine_{neon_type[0]}'
           - - a
             - FnCall:
-                - "vqrshrun_n_{type[4]}::<N>"
+                - "vqrshrun_n_{neon_type[1]}::<N>"
                 - - b
-            - "{type[5]}"
 
   - name: "vqshld_{type}"
     doc: "Signed saturating shift left"
@@ -8110,16 +8108,15 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [_high_n_s16, int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]', s16]
-      - [_high_n_s32, int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]', s32]
-      - [_high_n_s64, int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]', s64]
+      - [_high_n_s16, int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8']
+      - [_high_n_s32, int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16']
+      - [_high_n_s64, int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32']
     compose:
       - FnCall: [static_assert!, ["{type[4]}"]]
       - FnCall:
-          - simd_shuffle!
+          - 'vcombine_{neon_type[1]}'
           - - a
-            - FnCall: ["vqshrn_n_{type[6]}::<N>", [b]]
-            - "{type[5]}"
+            - FnCall: ["vqshrn_n_{neon_type[2]}::<N>", [b]]
 
   - name: "vqshrnd_n_u64"
     doc: "Unsigned saturating shift right narrow"
@@ -8178,16 +8175,15 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [_high_n_u16, uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [_high_n_u32, uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [_high_n_u64, uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
+      - [_high_n_u16, uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8']
+      - [_high_n_u32, uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16']
+      - [_high_n_u64, uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32']
     compose:
       - FnCall: [static_assert!, ["{type[4]}"]]
       - FnCall:
-          - simd_shuffle!
+          - 'vcombine_{neon_type[1]}'
           - - a
             - FnCall: ["vqshrn_n_{neon_type[2]}::<N>", [b]]
-            - "{type[5]}"
 
   - name: "vqshrun{type[0]}"
     doc: "Signed saturating shift right unsigned narrow"
@@ -8223,16 +8219,15 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [uint8x8_t, int16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x4_t, int32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [uint32x2_t, int64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
+      - [uint8x8_t, int16x8_t, uint8x16_t, 'N >= 1 && N <= 8']
+      - [uint16x4_t, int32x4_t, uint16x8_t, 'N >= 1 && N <= 16']
+      - [uint32x2_t, int64x2_t, uint32x4_t, 'N >= 1 && N <= 32']
     compose:
       - FnCall: [static_assert!, ["{type[3]}"]]
       - FnCall:
-          - simd_shuffle!
+          - 'vcombine_{neon_type[0]}'
           - - a
             - FnCall: ["vqshrun_n_{neon_type[1]}::<N>", [b]]
-            - "{type[4]}"
 
   - name: "vsqadd{type[0]}"
     doc: "Unsigned saturating accumulate of signed value"
@@ -8699,19 +8694,18 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
-      - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
+      - [int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8']
+      - [int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16']
+      - [int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32']
+      - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8']
+      - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16']
+      - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32']
     compose:
       - FnCall: [static_assert!, ["{type[3]}"]]
       - FnCall:
-          - simd_shuffle!
+          - 'vcombine_{neon_type[0]}'
           - - a
             - FnCall: ["vrshrn_n_{neon_type[1]}::<N>", [b]]
-            - "{type[4]}"
 
   - name: "vrsubhn_high_{neon_type[1]}"
     doc: "Rounding subtract returning high narrow"
@@ -9033,13 +9027,13 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int8x16_t, int16x8_t, int8x8_t, 'N >= 0 && N <= 8', '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int16x8_t, int32x4_t, int16x4_t, 'N >= 0 && N <= 16', '[4, 5, 6, 7]']
-      - [int32x4_t, int64x2_t, int32x2_t, 'N >= 0 && N <= 32', '[2, 3]']
+      - [int8x16_t, int16x8_t, int8x8_t, 'N >= 0 && N <= 8']
+      - [int16x8_t, int32x4_t, int16x4_t, 'N >= 0 && N <= 16']
+      - [int32x4_t, int64x2_t, int32x2_t, 'N >= 0 && N <= 32']
     compose:
       - FnCall: [static_assert!, ["{type[3]}"]]
-      - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
-      - FnCall: ["vshll_n_{neon_type[2]}::<N>", [b]]
+      - Let: [b, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
+      - FnCall: ["vshll_n_{neon_type[2]}", [b], [N]]
 
   - name: "vshll_high_n_{neon_type[0]}"
     doc: "Signed shift left long"
@@ -9052,13 +9046,13 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [uint8x16_t, uint16x8_t, uint8x8_t, 'N >= 0 && N <= 8', '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x8_t, uint32x4_t, uint16x4_t, 'N >= 0 && N <= 16', '[4, 5, 6, 7]']
-      - [uint32x4_t, uint64x2_t, uint32x2_t, 'N >= 0 && N <= 32', '[2, 3]']
+      - [uint8x16_t, uint16x8_t, uint8x8_t, 'N >= 0 && N <= 8']
+      - [uint16x8_t, uint32x4_t, uint16x4_t, 'N >= 0 && N <= 16']
+      - [uint32x4_t, uint64x2_t, uint32x2_t, 'N >= 0 && N <= 32']
     compose:
       - FnCall: [static_assert!, ["{type[3]}"]]
-      - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
-      - FnCall: ["vshll_n_{neon_type[2]}::<N>", [b]]
+      - Let: [b, "{neon_type[2]}", {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
+      - FnCall: ["vshll_n_{neon_type[2]}", [b], [N]]
 
   - name: "vshrn_high_n_{neon_type[1]}"
     doc: "Shift right narrow"
@@ -9071,19 +9065,18 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
-      - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
+      - [int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8']
+      - [int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16']
+      - [int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32']
+      - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8']
+      - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16']
+      - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32']
     compose:
       - FnCall: [static_assert!, ["{type[3]}"]]
       - FnCall:
-          - simd_shuffle!
+          - 'vcombine_{neon_type[0]}'
           - - a
-            - FnCall: ["vshrn_n_{neon_type[1]}::<N>", [b]]
-            - "{type[4]}"
+            - FnCall: ["vshrn_n_{neon_type[1]}", [b], [N]]
 
   - name: "vsm3partw1{neon_type.no}"
     doc: "SM3PARTW1"

From 3dd1690288e107a84690075f201e3463b51c12bc Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Sat, 9 May 2026 05:53:35 +0530
Subject: [PATCH 35/78] Change implementation of `vr{add,sub}hn_high`

---
 .../core_arch/src/aarch64/neon/generated.rs   | 36 +++++---------
 .../src/arm_shared/neon/generated.rs          | 48 +++++++++----------
 .../spec/neon/aarch64.spec.yml                | 36 ++++++--------
 .../spec/neon/arm_shared.spec.yml             | 45 +++++++++--------
 4 files changed, 72 insertions(+), 93 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 1cbac3e284b55..d09171a753526 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -20343,8 +20343,7 @@ pub fn vrsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
 #[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
-    let x: int8x8_t = vrsubhn_s16(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) }
+    vcombine_s8(a, vrsubhn_s16(b, c))
 }
 #[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s32)"]
@@ -20354,8 +20353,7 @@ pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
 #[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
-    let x: int16x4_t = vrsubhn_s32(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_s16(a, vrsubhn_s32(b, c))
 }
 #[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s64)"]
@@ -20365,8 +20363,7 @@ pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
 #[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
-    let x: int32x2_t = vrsubhn_s64(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3]) }
+    vcombine_s32(a, vrsubhn_s64(b, c))
 }
 #[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u16)"]
@@ -20376,8 +20373,7 @@ pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
 #[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
-    let x: uint8x8_t = vrsubhn_u16(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) }
+    vcombine_u8(a, vrsubhn_u16(b, c))
 }
 #[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u32)"]
@@ -20387,8 +20383,7 @@ pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_
 #[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
-    let x: uint16x4_t = vrsubhn_u32(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_u16(a, vrsubhn_u32(b, c))
 }
 #[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u64)"]
@@ -20398,8 +20393,7 @@ pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8
 #[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
-    let x: uint32x2_t = vrsubhn_u64(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3]) }
+    vcombine_u32(a, vrsubhn_u64(b, c))
 }
 #[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s16)"]
@@ -20409,8 +20403,7 @@ pub fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4
 #[cfg_attr(test, assert_instr(rsubhn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
-    let x: int8x8_t = vrsubhn_s16(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) }
+    vcombine_s8(a, vrsubhn_s16(b, c))
 }
 #[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s32)"]
@@ -20420,8 +20413,7 @@ pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
 #[cfg_attr(test, assert_instr(rsubhn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
-    let x: int16x4_t = vrsubhn_s32(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_s16(a, vrsubhn_s32(b, c))
 }
 #[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s64)"]
@@ -20431,8 +20423,7 @@ pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
 #[cfg_attr(test, assert_instr(rsubhn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
-    let x: int32x2_t = vrsubhn_s64(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3]) }
+    vcombine_s32(a, vrsubhn_s64(b, c))
 }
 #[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u16)"]
@@ -20442,8 +20433,7 @@ pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
 #[cfg_attr(test, assert_instr(rsubhn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
-    let x: uint8x8_t = vrsubhn_u16(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) }
+    vcombine_u8(a, vrsubhn_u16(b, c))
 }
 #[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u32)"]
@@ -20453,8 +20443,7 @@ pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_
 #[cfg_attr(test, assert_instr(rsubhn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
-    let x: uint16x4_t = vrsubhn_u32(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_u16(a, vrsubhn_u32(b, c))
 }
 #[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u64)"]
@@ -20464,8 +20453,7 @@ pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8
 #[cfg_attr(test, assert_instr(rsubhn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
-    let x: uint32x2_t = vrsubhn_u64(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3]) }
+    vcombine_u32(a, vrsubhn_u64(b, c))
 }
 #[doc = "Multi-vector floating-point adjust exponent"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscale_f16)"]
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index 3f5b7073e264a..6c0dd2af94e04 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -2207,7 +2207,7 @@ pub fn vaddh_f16(a: f16, b: f16) -> f16 {
 pub fn vaddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x16_t {
     unsafe {
         let x = simd_cast(simd_shr(simd_add(a, b), int16x8_t::splat(8)));
-        simd_shuffle!(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+        vcombine_s8(r, x)
     }
 }
 #[doc = "Add returning High Narrow (high half)."]
@@ -2235,7 +2235,7 @@ pub fn vaddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x16_t {
 pub fn vaddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int16x8_t {
     unsafe {
         let x = simd_cast(simd_shr(simd_add(a, b), int32x4_t::splat(16)));
-        simd_shuffle!(r, x, [0, 1, 2, 3, 4, 5, 6, 7])
+        vcombine_s16(r, x)
     }
 }
 #[doc = "Add returning High Narrow (high half)."]
@@ -2263,7 +2263,7 @@ pub fn vaddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int16x8_t {
 pub fn vaddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int32x4_t {
     unsafe {
         let x = simd_cast(simd_shr(simd_add(a, b), int64x2_t::splat(32)));
-        simd_shuffle!(r, x, [0, 1, 2, 3])
+        vcombine_s32(r, x)
     }
 }
 #[doc = "Add returning High Narrow (high half)."]
@@ -2291,7 +2291,7 @@ pub fn vaddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int32x4_t {
 pub fn vaddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> uint8x16_t {
     unsafe {
         let x = simd_cast(simd_shr(simd_add(a, b), uint16x8_t::splat(8)));
-        simd_shuffle!(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+        vcombine_u8(r, x)
     }
 }
 #[doc = "Add returning High Narrow (high half)."]
@@ -2319,7 +2319,7 @@ pub fn vaddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> uint8x16_t
 pub fn vaddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> uint16x8_t {
     unsafe {
         let x = simd_cast(simd_shr(simd_add(a, b), uint32x4_t::splat(16)));
-        simd_shuffle!(r, x, [0, 1, 2, 3, 4, 5, 6, 7])
+        vcombine_u16(r, x)
     }
 }
 #[doc = "Add returning High Narrow (high half)."]
@@ -2347,7 +2347,7 @@ pub fn vaddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> uint16x8_
 pub fn vaddhn_high_u64(r: uint32x2_t, a: uint64x2_t, b: uint64x2_t) -> uint32x4_t {
     unsafe {
         let x = simd_cast(simd_shr(simd_add(a, b), uint64x2_t::splat(32)));
-        simd_shuffle!(r, x, [0, 1, 2, 3])
+        vcombine_u32(r, x)
     }
 }
 #[doc = "Add returning High Narrow."]
@@ -38521,7 +38521,7 @@ pub fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 )]
 pub fn vraddhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
     let x = vraddhn_s16(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) }
+    vcombine_s8(a, x)
 }
 #[doc = "Rounding Add returning High Narrow (high half)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s32)"]
@@ -38547,7 +38547,7 @@ pub fn vraddhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
 )]
 pub fn vraddhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
     let x = vraddhn_s32(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_s16(a, x)
 }
 #[doc = "Rounding Add returning High Narrow (high half)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s64)"]
@@ -38573,7 +38573,7 @@ pub fn vraddhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
 )]
 pub fn vraddhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
     let x = vraddhn_s64(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3]) }
+    vcombine_s32(a, x)
 }
 #[doc = "Rounding Add returning High Narrow (high half)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u16)"]
@@ -38600,7 +38600,7 @@ pub fn vraddhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
 pub fn vraddhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
     unsafe {
         let x: uint8x8_t = transmute(vraddhn_s16(transmute(b), transmute(c)));
-        simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+        vcombine_u8(a, x)
     }
 }
 #[doc = "Rounding Add returning High Narrow (high half)."]
@@ -38628,7 +38628,7 @@ pub fn vraddhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_
 pub fn vraddhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
     unsafe {
         let x: uint16x4_t = transmute(vraddhn_s32(transmute(b), transmute(c)));
-        simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7])
+        vcombine_u16(a, x)
     }
 }
 #[doc = "Rounding Add returning High Narrow (high half)."]
@@ -38656,7 +38656,7 @@ pub fn vraddhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8
 pub fn vraddhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
     unsafe {
         let x: uint32x2_t = transmute(vraddhn_s64(transmute(b), transmute(c)));
-        simd_shuffle!(a, x, [0, 1, 2, 3])
+        vcombine_u32(a, x)
     }
 }
 #[doc = "Rounding Add returning High Narrow."]
@@ -69225,8 +69225,8 @@ pub fn vsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
-    let d: int8x8_t = vsubhn_s16(b, c);
-    unsafe { simd_shuffle!(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) }
+    let d = vsubhn_s16(b, c);
+    vcombine_s8(a, d)
 }
 #[doc = "Subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s32)"]
@@ -69251,8 +69251,8 @@ pub fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
-    let d: int16x4_t = vsubhn_s32(b, c);
-    unsafe { simd_shuffle!(a, d, [0, 1, 2, 3, 4, 5, 6, 7]) }
+    let d = vsubhn_s32(b, c);
+    vcombine_s16(a, d)
 }
 #[doc = "Subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s64)"]
@@ -69277,8 +69277,8 @@ pub fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
-    let d: int32x2_t = vsubhn_s64(b, c);
-    unsafe { simd_shuffle!(a, d, [0, 1, 2, 3]) }
+    let d = vsubhn_s64(b, c);
+    vcombine_s32(a, d)
 }
 #[doc = "Subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u16)"]
@@ -69303,8 +69303,8 @@ pub fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
-    let d: uint8x8_t = vsubhn_u16(b, c);
-    unsafe { simd_shuffle!(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) }
+    let d = vsubhn_u16(b, c);
+    vcombine_u8(a, d)
 }
 #[doc = "Subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u32)"]
@@ -69329,8 +69329,8 @@ pub fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
-    let d: uint16x4_t = vsubhn_u32(b, c);
-    unsafe { simd_shuffle!(a, d, [0, 1, 2, 3, 4, 5, 6, 7]) }
+    let d = vsubhn_u32(b, c);
+    vcombine_u16(a, d)
 }
 #[doc = "Subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u64)"]
@@ -69355,8 +69355,8 @@ pub fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
-    let d: uint32x2_t = vsubhn_u64(b, c);
-    unsafe { simd_shuffle!(a, d, [0, 1, 2, 3]) }
+    let d = vsubhn_u64(b, c);
+    vcombine_u32(a, d)
 }
 #[doc = "Subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s16)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index f6705033f9230..912eff7ce4cc5 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -8717,18 +8717,14 @@ intrinsics:
       - *neon-stable
     safety: safe
     types:
-      - [int8x8_t, int16x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int16x4_t, int32x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [int32x2_t, int64x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]']
-      - [uint8x8_t, uint16x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x4_t, uint32x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [uint32x2_t, uint64x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]']
+      - [int8x8_t, int16x8_t, int16x8_t, int8x16_t]
+      - [int16x4_t, int32x4_t, int32x4_t, int16x8_t]
+      - [int32x2_t, int64x2_t, int64x2_t, int32x4_t]
+      - [uint8x8_t, uint16x8_t, uint16x8_t, uint8x16_t]
+      - [uint16x4_t, uint32x4_t, uint32x4_t, uint16x8_t]
+      - [uint32x2_t, uint64x2_t, uint64x2_t, uint32x4_t]
     compose:
-      - Let:
-          - x
-          - "{neon_type[0]}"
-          - FnCall: ["vrsubhn_{neon_type[1]}", [b, c]]
-      - FnCall: [simd_shuffle!, [a, x, "{type[4]}"]]
+      - FnCall: ['vcombine_{neon_type[0]}', [a, {FnCall: ["vrsubhn_{neon_type[1]}", [b, c]]}]]
 
   - name: "vrsubhn_high_{neon_type[1]}"
     doc: "Rounding subtract returning high narrow"
@@ -8740,18 +8736,14 @@ intrinsics:
       - *neon-stable
     safety: safe
     types:
-      - [int8x8_t, int16x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int16x4_t, int32x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [int32x2_t, int64x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]']
-      - [uint8x8_t, uint16x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x4_t, uint32x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [uint32x2_t, uint64x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]']
+      - [int8x8_t, int16x8_t, int16x8_t, int8x16_t]
+      - [int16x4_t, int32x4_t, int32x4_t, int16x8_t]
+      - [int32x2_t, int64x2_t, int64x2_t, int32x4_t]
+      - [uint8x8_t, uint16x8_t, uint16x8_t, uint8x16_t]
+      - [uint16x4_t, uint32x4_t, uint32x4_t, uint16x8_t]
+      - [uint32x2_t, uint64x2_t, uint64x2_t, uint32x4_t]
     compose:
-      - Let:
-          - x
-          - "{neon_type[0]}"
-          - FnCall: ["vrsubhn_{neon_type[1]}", [b, c]]
-      - FnCall: [simd_shuffle!, [a, x, "{type[4]}"]]
+      - FnCall: ['vcombine_{neon_type[0]}', [a, {FnCall: ["vrsubhn_{neon_type[1]}", [b, c]]}]]
 
   - name: "vcopy{neon_type[0].lane_nox}"
     doc: "Insert vector element from another vector element"
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
index e646d22369f80..f22f503c564cc 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@@ -6805,18 +6805,17 @@ intrinsics:
       - *neon-cfg-arm-unstable
     safety: safe
     types:
-      - [int8x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int16x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [int32x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]']
-      - [uint8x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [uint32x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]']
+      - [int8x8_t, int16x8_t, int8x16_t]
+      - [int16x4_t, int32x4_t, int16x8_t]
+      - [int32x2_t, int64x2_t, int32x4_t]
+      - [uint8x8_t, uint16x8_t, uint8x16_t]
+      - [uint16x4_t, uint32x4_t, uint16x8_t]
+      - [uint32x2_t, uint64x2_t, uint32x4_t]
     compose:
       - Let:
           - d
-          - "{neon_type[0]}"
           - FnCall: ["vsubhn{neon_type[1].noq}", [b, c]]
-      - FnCall: [simd_shuffle!, [a, d, "{type[3]}"]]
+      - FnCall: ['vcombine_{neon_type[0]}', [a, d]]
 
   - name: "vhsub{neon_type[1].no}"
     doc: "Signed halving subtract"
@@ -13225,9 +13224,9 @@ intrinsics:
       - *neon-cfg-arm-unstable
     safety: safe
     types:
-      - [uint8x8_t , uint16x8_t, uint8x16_t, 'vraddhn.i16', int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x4_t, uint32x4_t, uint16x8_t, 'vraddhn.i32', int32x4_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [uint32x2_t, uint64x2_t, uint32x4_t, 'vraddhn.i64', int64x2_t, '[0, 1, 2, 3]']
+      - [uint8x8_t , uint16x8_t, uint8x16_t, 'vraddhn.i16', int16x8_t]
+      - [uint16x4_t, uint32x4_t, uint16x8_t, 'vraddhn.i32', int32x4_t]
+      - [uint32x2_t, uint64x2_t, uint32x4_t, 'vraddhn.i64', int64x2_t]
     compose:
       - Let:
           - x
@@ -13238,7 +13237,7 @@ intrinsics:
                     - "vraddhn{neon_type[4].noq}"
                     - - FnCall: [transmute, [b]]
                       - FnCall: [transmute, [c]]
-      - FnCall: ["simd_shuffle!", [a, x, '{type[5]}']]
+      - FnCall: ['vcombine_{neon_type[0]}', [a, x]]
 
   - name: "vraddhn_high{neon_type[1].noq}"
     doc: "Rounding Add returning High Narrow (high half)."
@@ -13252,9 +13251,9 @@ intrinsics:
       - *neon-cfg-arm-unstable
     safety: safe
     types:
-      - [int8x8_t , int16x8_t, int8x16_t, 'vraddhn.i16', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int16x4_t, int32x4_t, int16x8_t, 'vraddhn.i32', '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [int32x2_t, int64x2_t, int32x4_t, 'vraddhn.i64', '[0, 1, 2, 3]']
+      - [int8x8_t , int16x8_t, int8x16_t, 'vraddhn.i16']
+      - [int16x4_t, int32x4_t, int16x8_t, 'vraddhn.i32']
+      - [int32x2_t, int64x2_t, int32x4_t, 'vraddhn.i64']
     compose:
       - Let:
           - x
@@ -13262,7 +13261,7 @@ intrinsics:
               - "vraddhn{neon_type[1].noq}"
               - - b
                 - c
-      - FnCall: ["simd_shuffle!", [a, x, '{type[4]}']]
+      - FnCall: ['vcombine_{neon_type[0]}', [a, x]]
 
   - name: "vpadd{neon_type.no}"
     doc: "Add pairwise."
@@ -14591,12 +14590,12 @@ intrinsics:
       - *neon-cfg-arm-unstable
     safety: safe
     types:
-      - ['vaddhn_high_s16', 'int8x8_t', 'int16x8_t', 'int8x16_t', 'int16x8_t::splat(8)', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - ['vaddhn_high_s32', 'int16x4_t', 'int32x4_t', 'int16x8_t', 'int32x4_t::splat(16)', '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - ['vaddhn_high_s64', 'int32x2_t', 'int64x2_t', 'int32x4_t', 'int64x2_t::splat(32)', '[0, 1, 2, 3]']
-      - ['vaddhn_high_u16', 'uint8x8_t', 'uint16x8_t', 'uint8x16_t', 'uint16x8_t::splat(8)', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - ['vaddhn_high_u32', 'uint16x4_t', 'uint32x4_t', 'uint16x8_t', 'uint32x4_t::splat(16)', '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - ['vaddhn_high_u64', 'uint32x2_t', 'uint64x2_t', 'uint32x4_t', 'uint64x2_t::splat(32)', '[0, 1, 2, 3]']
+      - ['vaddhn_high_s16', 'int8x8_t', 'int16x8_t', 'int8x16_t', 'int16x8_t::splat(8)']
+      - ['vaddhn_high_s32', 'int16x4_t', 'int32x4_t', 'int16x8_t', 'int32x4_t::splat(16)']
+      - ['vaddhn_high_s64', 'int32x2_t', 'int64x2_t', 'int32x4_t', 'int64x2_t::splat(32)']
+      - ['vaddhn_high_u16', 'uint8x8_t', 'uint16x8_t', 'uint8x16_t', 'uint16x8_t::splat(8)']
+      - ['vaddhn_high_u32', 'uint16x4_t', 'uint32x4_t', 'uint16x8_t', 'uint32x4_t::splat(16)']
+      - ['vaddhn_high_u64', 'uint32x2_t', 'uint64x2_t', 'uint32x4_t', 'uint64x2_t::splat(32)']
     compose:
       - Let:
         - x
@@ -14609,7 +14608,7 @@ intrinsics:
                   - - a
                     - b
                 - '{type[4]}'
-      - FnCall: ['simd_shuffle!', [r, x, '{type[5]}']]
+      - FnCall: ['vcombine_{neon_type[1]}', [r, x]]
 
   - name: "{type[0]}"
     doc: "Vector narrow integer."

From cee8ece450dcfb1bbea826df7dcd7097bb8a0661 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Sat, 9 May 2026 06:10:10 +0530
Subject: [PATCH 36/78] Change implementation of `vmul_lane`

---
 .../core_arch/src/aarch64/neon/generated.rs   |   8 +-
 .../src/arm_shared/neon/generated.rs          | 189 ++----------------
 .../spec/neon/aarch64.spec.yml                |  10 +-
 .../spec/neon/arm_shared.spec.yml             |  52 ++---
 4 files changed, 57 insertions(+), 202 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index d09171a753526..63ffa6765cec6 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -12591,7 +12591,7 @@ pub fn vmul_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t) -> float64
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmul_laneq_f16<const LANE: i32>(a: float16x4_t, b: float16x8_t) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32; 4])) }
+    unsafe { simd_mul(a, vdup_laneq_f16::<LANE>(b)) }
 }
 #[doc = "Floating-point multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f16)"]
@@ -12603,7 +12603,7 @@ pub fn vmul_laneq_f16<const LANE: i32>(a: float16x4_t, b: float16x8_t) -> float1
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulq_laneq_f16<const LANE: i32>(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32; 8])) }
+    unsafe { simd_mul(a, vdupq_laneq_f16::<LANE>(b)) }
 }
 #[doc = "Floating-point multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f64)"]
@@ -12917,7 +12917,7 @@ pub fn vmull_p64(a: p64, b: p64) -> p128 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
     static_assert!(LANE == 0);
-    unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32; 2])) }
+    unsafe { simd_mul(a, vdupq_lane_f64::<LANE>(b)) }
 }
 #[doc = "Floating-point multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f64)"]
@@ -12928,7 +12928,7 @@ pub fn vmulq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x1_t) -> float6
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32; 2])) }
+    unsafe { simd_mul(a, vdupq_laneq_f64::<LANE>(b)) }
 }
 #[doc = "Floating-point multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuls_lane_f32)"]
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index 6c0dd2af94e04..2e3a24ee957ef 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -29763,12 +29763,7 @@ pub fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmul_lane_f16<const LANE: i32>(a: float16x4_t, v: float16x4_t) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(v, v, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { simd_mul(a, vdup_lane_f16::<LANE>(v)) }
 }
 #[doc = "Multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f16)"]
@@ -29792,25 +29787,7 @@ pub fn vmul_lane_f16<const LANE: i32>(a: float16x4_t, v: float16x4_t) -> float16
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulq_lane_f16<const LANE: i32>(a: float16x8_t, v: float16x4_t) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(
-                v,
-                v,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    unsafe { simd_mul(a, vdupq_lane_f16::<LANE>(v)) }
 }
 #[doc = "Floating-point multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f32)"]
@@ -29833,7 +29810,7 @@ pub fn vmulq_lane_f16<const LANE: i32>(a: float16x8_t, v: float16x4_t) -> float1
 )]
 pub fn vmul_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+    unsafe { simd_mul(a, vdup_lane_f32::<LANE>(b)) }
 }
 #[doc = "Floating-point multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f32)"]
@@ -29856,7 +29833,7 @@ pub fn vmul_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32
 )]
 pub fn vmul_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+    unsafe { simd_mul(a, vdup_laneq_f32::<LANE>(b)) }
 }
 #[doc = "Floating-point multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f32)"]
@@ -29879,12 +29856,7 @@ pub fn vmul_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float3
 )]
 pub fn vmulq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { simd_mul(a, vdupq_lane_f32::<LANE>(b)) }
 }
 #[doc = "Floating-point multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f32)"]
@@ -29907,12 +29879,7 @@ pub fn vmulq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float3
 )]
 pub fn vmulq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { simd_mul(a, vdupq_laneq_f32::<LANE>(b)) }
 }
 #[doc = "Multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s16)"]
@@ -29935,12 +29902,7 @@ pub fn vmulq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float
 )]
 pub fn vmul_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { simd_mul(a, vdup_lane_s16::<LANE>(b)) }
 }
 #[doc = "Multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s16)"]
@@ -29963,25 +29925,7 @@ pub fn vmul_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 )]
 pub fn vmulq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(
-                b,
-                b,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    unsafe { simd_mul(a, vdupq_lane_s16::<LANE>(b)) }
 }
 #[doc = "Multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s32)"]
@@ -30004,7 +29948,7 @@ pub fn vmulq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t
 )]
 pub fn vmul_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+    unsafe { simd_mul(a, vdup_lane_s32::<LANE>(b)) }
 }
 #[doc = "Multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s32)"]
@@ -30027,12 +29971,7 @@ pub fn vmul_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 )]
 pub fn vmulq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { simd_mul(a, vdupq_lane_s32::<LANE>(b)) }
 }
 #[doc = "Multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u16)"]
@@ -30055,12 +29994,7 @@ pub fn vmulq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t
 )]
 pub fn vmul_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { simd_mul(a, vdup_lane_u16::<LANE>(b)) }
 }
 #[doc = "Multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u16)"]
@@ -30083,25 +30017,7 @@ pub fn vmul_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_
 )]
 pub fn vmulq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(
-                b,
-                b,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    unsafe { simd_mul(a, vdupq_lane_u16::<LANE>(b)) }
 }
 #[doc = "Multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u32)"]
@@ -30124,7 +30040,7 @@ pub fn vmulq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint16x8
 )]
 pub fn vmul_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+    unsafe { simd_mul(a, vdup_lane_u32::<LANE>(b)) }
 }
 #[doc = "Multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u32)"]
@@ -30147,12 +30063,7 @@ pub fn vmul_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_
 )]
 pub fn vmulq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { simd_mul(a, vdupq_lane_u32::<LANE>(b)) }
 }
 #[doc = "Multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s16)"]
@@ -30175,12 +30086,7 @@ pub fn vmulq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint32x4
 )]
 pub fn vmul_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { simd_mul(a, vdup_laneq_s16::<LANE>(b)) }
 }
 #[doc = "Multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s16)"]
@@ -30203,25 +30109,7 @@ pub fn vmul_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t
 )]
 pub fn vmulq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(
-                b,
-                b,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    unsafe { simd_mul(a, vdupq_laneq_s16::<LANE>(b)) }
 }
 #[doc = "Multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s32)"]
@@ -30244,7 +30132,7 @@ pub fn vmulq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t
 )]
 pub fn vmul_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+    unsafe { simd_mul(a, vdup_laneq_s32::<LANE>(b)) }
 }
 #[doc = "Multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s32)"]
@@ -30267,12 +30155,7 @@ pub fn vmul_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t
 )]
 pub fn vmulq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { simd_mul(a, vdupq_laneq_s32::<LANE>(b)) }
 }
 #[doc = "Multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u16)"]
@@ -30295,12 +30178,7 @@ pub fn vmulq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t
 )]
 pub fn vmul_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { simd_mul(a, vdup_laneq_u16::<LANE>(b)) }
 }
 #[doc = "Multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u16)"]
@@ -30323,25 +30201,7 @@ pub fn vmul_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint16x4
 )]
 pub fn vmulq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(
-                b,
-                b,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    unsafe { simd_mul(a, vdupq_laneq_u16::<LANE>(b)) }
 }
 #[doc = "Multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u32)"]
@@ -30364,7 +30224,7 @@ pub fn vmulq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x
 )]
 pub fn vmul_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+    unsafe { simd_mul(a, vdup_laneq_u32::<LANE>(b)) }
 }
 #[doc = "Multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u32)"]
@@ -30387,12 +30247,7 @@ pub fn vmul_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint32x2
 )]
 pub fn vmulq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { simd_mul(a, vdupq_laneq_u32::<LANE>(b)) }
 }
 #[doc = "Vector multiply by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f16)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 912eff7ce4cc5..7664bf3c10fa4 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -11006,7 +11006,7 @@ intrinsics:
       - FnCall:
           - simd_mul
           - - a
-            - FnCall: ["simd_shuffle!", [b, b, '[LANE as u32; 2]']]
+            - FnCall: [vdupq_lane_f64, [b], [LANE]]
 
   - name: "vmuld_lane_f64"
     doc: "Floating-point multiply"
@@ -11063,7 +11063,7 @@ intrinsics:
       - FnCall:
           - simd_mul
           - - a
-            - FnCall: [simd_shuffle!, [b, b, '[LANE as u32; 2]']]
+            - FnCall: [vdupq_laneq_f64, [b], [LANE]]
 
 
   # vmulq_laneq_f16
@@ -11080,14 +11080,14 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [float16x4_t, float16x8_t, '_lane', "[LANE as u32; 4]"]
-      - [float16x8_t, float16x8_t, 'q_lane', "[LANE as u32; 8]"]
+      - [float16x4_t, float16x8_t, '_lane']
+      - [float16x8_t, float16x8_t, 'q_lane']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '3']]
       - FnCall:
           - simd_mul
           - - a
-            - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]
+            - FnCall: ['vdup{neon_type[0].laneq_nox}', [b], [LANE]]
 
 
   - name: "vmul{type[1]}_{type[0]}"
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
index f22f503c564cc..d20c94a2f8cbf 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@@ -6300,20 +6300,20 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint16x4_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint32x2_t, uint32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int16x4_t, int16x4_t, '2']
+      - [int16x8_t, int16x4_t, '2']
+      - [int32x2_t, int32x2_t, '1']
+      - [int32x4_t, int32x2_t, '1']
+      - [uint16x4_t, uint16x4_t, '2']
+      - [uint16x8_t, uint16x4_t, '2']
+      - [uint32x2_t, uint32x2_t, '1']
+      - [uint32x4_t, uint32x2_t, '1']
     compose:
       - FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]]
       - FnCall:
           - simd_mul
           - - a
-            - FnCall: ["simd_shuffle!", [b, b, "{type[3]}"]]
+            - FnCall: ["vdup{neon_type[0].lane_nox}", [b], [LANE]]
 
 
   - name: "vmul{neon_type[0].lane_nox}"
@@ -6332,14 +6332,14 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float16x4_t, float16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [float16x8_t, float16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [float16x4_t, float16x4_t, '2']
+      - [float16x8_t, float16x4_t, '2']
     compose:
       - FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]]
       - FnCall:
           - simd_mul
           - - a
-            - FnCall: ["simd_shuffle!", [v, v, "{type[3]}"]]
+            - FnCall: ["vdup{neon_type[0].lane_nox}", [v], [LANE]]
 
 
   - name: "vmul{neon_type[0].laneq_nox}"
@@ -6356,20 +6356,20 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
-      - [int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint16x4_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint32x2_t, uint32x4_t, '2', '[LANE as u32, LANE as u32]']
-      - [uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int16x4_t, int16x8_t, '3']
+      - [int16x8_t, int16x8_t, '3']
+      - [int32x2_t, int32x4_t, '2']
+      - [int32x4_t, int32x4_t, '2']
+      - [uint16x4_t, uint16x8_t, '3']
+      - [uint16x8_t, uint16x8_t, '3']
+      - [uint32x2_t, uint32x4_t, '2']
+      - [uint32x4_t, uint32x4_t, '2']
     compose:
       - FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]]
       - FnCall:
           - simd_mul
           - - a
-            - FnCall: ["simd_shuffle!", [b, b, "{type[3]}"]]
+            - FnCall: ["vdup{neon_type[0].laneq_nox}", [b], [LANE]]
 
   - name: "vmull{neon_type[1].no}"
     doc: Signed multiply long
@@ -11204,16 +11204,16 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [float32x2_t, float32x2_t, '_lane_f32', '1', '[LANE as u32, LANE as u32]']
-      - [float32x2_t, float32x4_t, '_laneq_f32', '2', '[LANE as u32, LANE as u32]']
-      - [float32x4_t, float32x2_t, 'q_lane_f32', '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [float32x4_t, float32x4_t, 'q_laneq_f32', '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [float32x2_t, float32x2_t, '_lane_f32', '1']
+      - [float32x2_t, float32x4_t, '_laneq_f32', '2']
+      - [float32x4_t, float32x2_t, 'q_lane_f32', '1']
+      - [float32x4_t, float32x4_t, 'q_laneq_f32', '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall:
           - simd_mul
           - - a
-            - FnCall: [simd_shuffle!, [b, b, "{type[4]}"]]
+            - FnCall: ['vdup{type[2]}', [b], [LANE]]
 
   - name: "vqrdmulh{type[0]}"
     doc: "Vector rounding saturating doubling multiply high by scalar"

From 0d1fd7fa473c7ac801a1bad12f2961820373743f Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Sat, 9 May 2026 06:37:24 +0530
Subject: [PATCH 37/78] Change implementation of `vld1_dup`

---
 .../crates/core_arch/src/arm_shared/neon/generated.rs  |  8 ++++----
 .../stdarch-gen-arm/spec/neon/arm_shared.spec.yml      | 10 +++++-----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index 2e3a24ee957ef..1d8484e47fe67 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -14634,8 +14634,8 @@ pub fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1_dup_f16(ptr: *const f16) -> float16x4_t {
-    let x: float16x4_t = vld1_lane_f16::<0>(ptr, transmute(f16x4::splat(0.0)));
-    simd_shuffle!(x, x, [0, 0, 0, 0])
+    let x = vld1_lane_f16::<0>(ptr, transmute(f16x4::splat(0.0)));
+    vdup_lane_f16::<0>(x)
 }
 #[doc = "Load one single-element structure and replicate to all lanes of one register"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_f16)"]
@@ -14653,8 +14653,8 @@ pub unsafe fn vld1_dup_f16(ptr: *const f16) -> float16x4_t {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1q_dup_f16(ptr: *const f16) -> float16x8_t {
-    let x: float16x8_t = vld1q_lane_f16::<0>(ptr, transmute(f16x8::splat(0.0)));
-    simd_shuffle!(x, x, [0, 0, 0, 0, 0, 0, 0, 0])
+    let x = vld1q_lane_f16::<0>(ptr, transmute(f16x8::splat(0.0)));
+    vdupq_laneq_f16::<0>(x)
 }
 #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_f32)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
index d20c94a2f8cbf..61979ecf0f3e0 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@@ -2835,7 +2835,7 @@ intrinsics:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall: [simd_insert!, [src, "LANE as u32", "*ptr"]]
 
-  - name: "vld1{type[2]}_{neon_type[1]}"
+  - name: "vld1{neon_type[1].dup_nox}"
     doc: "Load one single-element structure and replicate to all lanes of one register"
     arguments: ["ptr: {type[0]}"]
     return_type: "{neon_type[1]}"
@@ -2849,11 +2849,11 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - ["*const f16", float16x4_t, '_dup', 'f16x4', "[0, 0, 0, 0]"]
-      - ["*const f16", float16x8_t, 'q_dup', 'f16x8', "[0, 0, 0, 0, 0, 0, 0, 0]"]
+      - ["*const f16", float16x4_t, '_lane', 'f16x4']
+      - ["*const f16", float16x8_t, 'q_laneq', 'f16x8']
     compose:
-      - Let: [x, "{neon_type[1]}", "vld1{neon_type[1].lane_nox}::<0>(ptr, transmute({type[3]}::splat(0.0)))"]
-      - FnCall: [simd_shuffle!, [x, x, "{type[4]}"]]
+      - Let: [x, {FnCall: ["vld1{neon_type[1].lane_nox}", [ptr, {FnCall: [transmute, ["{type[3]}::splat(0.0)"]]}], [0]]}]
+      - FnCall: ['vdup{type[2]}_{neon_type[1]}', [x], [0]]
 
 
   - name: "vld2{neon_type[1].nox}"

From 95e2c0ff2c7ab1b6d589e1928921b427e6eeece2 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Sat, 9 May 2026 07:01:28 +0530
Subject: [PATCH 38/78] Change implementation of `v{us,su,}dot_lane`

---
 .../src/arm_shared/neon/generated.rs          | 136 +++++++-----------
 .../spec/neon/arm_shared.spec.yml             |  96 ++++++-------
 2 files changed, 89 insertions(+), 143 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index 1d8484e47fe67..476d8e78a21a2 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -9663,11 +9663,9 @@ pub fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
 )]
 pub fn vdot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    let c: int32x2_t = vreinterpret_s32_s8(c);
-    unsafe {
-        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vdot_s32(a, b, vreinterpret_s8_s32(c))
-    }
+    let c = vreinterpret_s32_s8(c);
+    let c = vdup_lane_s32::<LANE>(c);
+    vdot_s32(a, b, vreinterpret_s8_s32(c))
 }
 #[doc = "Dot product arithmetic (indexed)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_s32)"]
@@ -9690,12 +9688,9 @@ pub fn vdot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x8_t) ->
 )]
 pub fn vdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x8_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    let c: int32x2_t = vreinterpret_s32_s8(c);
-    unsafe {
-        let c: int32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vdotq_s32(a, b, vreinterpretq_s8_s32(c))
-    }
+    let c = vreinterpret_s32_s8(c);
+    let c = vdupq_lane_s32::<LANE>(c);
+    vdotq_s32(a, b, vreinterpretq_s8_s32(c))
 }
 #[doc = "Dot product arithmetic (indexed)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_u32)"]
@@ -9718,11 +9713,9 @@ pub fn vdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x8_t)
 )]
 pub fn vdot_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    let c: uint32x2_t = vreinterpret_u32_u8(c);
-    unsafe {
-        let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vdot_u32(a, b, vreinterpret_u8_u32(c))
-    }
+    let c = vreinterpret_u32_u8(c);
+    let c = vdup_lane_u32::<LANE>(c);
+    vdot_u32(a, b, vreinterpret_u8_u32(c))
 }
 #[doc = "Dot product arithmetic (indexed)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_u32)"]
@@ -9745,12 +9738,9 @@ pub fn vdot_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t)
 )]
 pub fn vdotq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x8_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    let c: uint32x2_t = vreinterpret_u32_u8(c);
-    unsafe {
-        let c: uint32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vdotq_u32(a, b, vreinterpretq_u8_u32(c))
-    }
+    let c = vreinterpret_u32_u8(c);
+    let c = vdupq_lane_u32::<LANE>(c);
+    vdotq_u32(a, b, vreinterpretq_u8_u32(c))
 }
 #[doc = "Dot product arithmetic (indexed)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_s32)"]
@@ -9766,11 +9756,9 @@ pub fn vdotq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x8_
 #[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
 pub fn vdot_laneq_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x16_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    let c: int32x4_t = vreinterpretq_s32_s8(c);
-    unsafe {
-        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vdot_s32(a, b, vreinterpret_s8_s32(c))
-    }
+    let c = vreinterpretq_s32_s8(c);
+    let c = vdup_laneq_s32::<LANE>(c);
+    vdot_s32(a, b, vreinterpret_s8_s32(c))
 }
 #[doc = "Dot product arithmetic (indexed)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_s32)"]
@@ -9786,12 +9774,9 @@ pub fn vdot_laneq_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x16_t)
 #[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
 pub fn vdotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    let c: int32x4_t = vreinterpretq_s32_s8(c);
-    unsafe {
-        let c: int32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vdotq_s32(a, b, vreinterpretq_s8_s32(c))
-    }
+    let c = vreinterpretq_s32_s8(c);
+    let c = vdupq_laneq_s32::<LANE>(c);
+    vdotq_s32(a, b, vreinterpretq_s8_s32(c))
 }
 #[doc = "Dot product arithmetic (indexed)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_u32)"]
@@ -9807,11 +9792,9 @@ pub fn vdotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x16_t
 #[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
 pub fn vdot_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x16_t) -> uint32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    let c: uint32x4_t = vreinterpretq_u32_u8(c);
-    unsafe {
-        let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vdot_u32(a, b, transmute(c))
-    }
+    let c = vreinterpretq_u32_u8(c);
+    let c = vdup_laneq_u32::<LANE>(c);
+    vdot_u32(a, b, vreinterpret_u8_u32(c))
 }
 #[doc = "Dot product arithmetic (indexed)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_u32)"]
@@ -9827,12 +9810,9 @@ pub fn vdot_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x16_
 #[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
 pub fn vdotq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    let c: uint32x4_t = vreinterpretq_u32_u8(c);
-    unsafe {
-        let c: uint32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vdotq_u32(a, b, transmute(c))
-    }
+    let c = vreinterpretq_u32_u8(c);
+    let c = vdupq_laneq_u32::<LANE>(c);
+    vdotq_u32(a, b, vreinterpretq_u8_u32(c))
 }
 #[doc = "Dot product arithmetic (vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_s32)"]
@@ -69646,11 +69626,9 @@ pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t {
 )]
 pub fn vsudot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    let c: uint32x2_t = vreinterpret_u32_u8(c);
-    unsafe {
-        let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vusdot_s32(a, vreinterpret_u8_u32(c), b)
-    }
+    let c = vreinterpret_u32_u8(c);
+    let c = vdup_lane_u32::<LANE>(c);
+    vusdot_s32(a, vreinterpret_u8_u32(c), b)
 }
 #[doc = "Dot product index form with signed and unsigned integers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_lane_s32)"]
@@ -69677,12 +69655,9 @@ pub fn vsudot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x8_t)
 )]
 pub fn vsudotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    let c: uint32x2_t = vreinterpret_u32_u8(c);
-    unsafe {
-        let c: uint32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vusdotq_s32(a, vreinterpretq_u8_u32(c), b)
-    }
+    let c = vreinterpret_u32_u8(c);
+    let c = vdupq_lane_u32::<LANE>(c);
+    vusdotq_s32(a, vreinterpretq_u8_u32(c), b)
 }
 #[doc = "Dot product index form with signed and unsigned integers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_laneq_s32)"]
@@ -69702,11 +69677,9 @@ pub fn vsudotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: uint8x8_
 #[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
 pub fn vsudot_laneq_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x16_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: uint32x4_t = transmute(c);
-        let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vusdot_s32(a, transmute(c), b)
-    }
+    let c = vreinterpretq_u32_u8(c);
+    let c = vdup_laneq_u32::<LANE>(c);
+    vusdot_s32(a, vreinterpret_u8_u32(c), b)
 }
 #[doc = "Dot product index form with signed and unsigned integers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_laneq_s32)"]
@@ -69726,12 +69699,9 @@ pub fn vsudot_laneq_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x16_
 #[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
 pub fn vsudotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: uint8x16_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: uint32x4_t = transmute(c);
-        let c: uint32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vusdotq_s32(a, transmute(c), b)
-    }
+    let c = vreinterpretq_u32_u8(c);
+    let c = vdupq_laneq_u32::<LANE>(c);
+    vusdotq_s32(a, vreinterpretq_u8_u32(c), b)
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1)"]
@@ -71633,11 +71603,9 @@ pub fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 )]
 pub fn vusdot_lane_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    let c: int32x2_t = vreinterpret_s32_s8(c);
-    unsafe {
-        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vusdot_s32(a, b, vreinterpret_s8_s32(c))
-    }
+    let c = vreinterpret_s32_s8(c);
+    let c = vdup_lane_s32::<LANE>(c);
+    vusdot_s32(a, b, vreinterpret_s8_s32(c))
 }
 #[doc = "Dot product index form with unsigned and signed integers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_lane_s32)"]
@@ -71660,12 +71628,9 @@ pub fn vusdot_lane_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x8_t)
 )]
 pub fn vusdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    let c: int32x2_t = vreinterpret_s32_s8(c);
-    unsafe {
-        let c: int32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vusdotq_s32(a, b, vreinterpretq_s8_s32(c))
-    }
+    let c = vreinterpret_s32_s8(c);
+    let c = vdupq_lane_s32::<LANE>(c);
+    vusdotq_s32(a, b, vreinterpretq_s8_s32(c))
 }
 #[doc = "Dot product index form with unsigned and signed integers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_laneq_s32)"]
@@ -71681,11 +71646,9 @@ pub fn vusdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x8_
 #[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
 pub fn vusdot_laneq_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x16_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: int32x4_t = transmute(c);
-        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vusdot_s32(a, b, vreinterpret_s8_s32(c))
-    }
+    let c = vreinterpretq_s32_s8(c);
+    let c = vdup_laneq_s32::<LANE>(c);
+    vusdot_s32(a, b, vreinterpret_s8_s32(c))
 }
 #[doc = "Dot product index form with unsigned and signed integers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_laneq_s32)"]
@@ -71701,12 +71664,9 @@ pub fn vusdot_laneq_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x16_
 #[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
 pub fn vusdotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: int32x4_t = transmute(c);
-        let c: int32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vusdotq_s32(a, b, vreinterpretq_s8_s32(c))
-    }
+    let c = vreinterpretq_s32_s8(c);
+    let c = vdupq_laneq_s32::<LANE>(c);
+    vusdotq_s32(a, b, vreinterpretq_s8_s32(c))
 }
 #[doc = "Dot product vector form with unsigned and signed integers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_s32)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
index 61979ecf0f3e0..1f7e1f6987a5a 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@@ -6159,10 +6159,10 @@ intrinsics:
             - link: "llvm.arm.neon.usdot.v{neon_type[0].lane}i32.v{neon_type[1].lane}i8"
               arch: arm
 
-  - name: "vusdot{type[0]}"
+  - name: "vusdot{neon_type[0].lane_nox}"
     doc: "Dot product index form with unsigned and signed integers"
-    arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}", "c: int8x8_t"]
-    return_type: "{neon_type[1]}"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: int8x8_t"]
+    return_type: "{neon_type[0]}"
     attr:
       - *neon-i8mm
       - *neon-v8
@@ -6174,19 +6174,17 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - ['_lane_s32', int32x2_t, uint8x8_t, '[LANE as u32, LANE as u32]','']
-      - ['q_lane_s32', int32x4_t, uint8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
+      - [int32x2_t, uint8x8_t, '']
+      - [int32x4_t, uint8x16_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '1']]
       - Let:
           - c
-          - int32x2_t
           - FnCall: ['vreinterpret_s32_s8', [c]]
       - Let:
           - c
-          - "{type[1]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
-      - FnCall: ["vusdot{neon_type[1].no}", [a, b, {FnCall: ['vreinterpret{type[4]}_s8_s32', [c]]}]]
+          - FnCall: ['vdup{neon_type[0].lane_nox}', [c], [LANE]]
+      - FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: ['vreinterpret{type[2]}_s8_s32', [c]]}]]
 
   - name: "vsudot{neon_type[0].lane_nox}"
     doc: "Dot product index form with signed and unsigned integers"
@@ -6203,22 +6201,20 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [int32x2_t, int8x8_t, uint8x8_t, '[LANE as u32, LANE as u32]', uint32x2_t,'']
-      - [int32x4_t, int8x16_t, uint8x8_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]', uint32x4_t,'q']
+      - [int32x2_t, int8x8_t, uint8x8_t, uint32x2_t, '']
+      - [int32x4_t, int8x16_t, uint8x8_t, uint32x4_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '1']]
       - Let:
           - c
-          - uint32x2_t
           - FnCall: ['vreinterpret_u32_u8', [c]]
       - Let:
           - c
-          - "{type[4]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
+          - FnCall: ['vdup{neon_type[3].lane_nox}', [c], [LANE]]
       - FnCall: 
           - "vusdot{neon_type[0].no}"
           - - a
-            - FnCall: ['vreinterpret{type[5]}_u8_u32', [c]]
+            - FnCall: ['vreinterpret{type[4]}_u8_u32', [c]]
             - b
 
   - name: "vmul{neon_type[1].no}"
@@ -6979,13 +6975,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [int32x2_t, uint8x8_t, int8x16_t, '[LANE as u32, LANE as u32]','']
-      - [int32x4_t, uint8x16_t, int8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
+      - [int32x2_t, uint8x8_t, int8x16_t, '']
+      - [int32x4_t, uint8x16_t, int8x16_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '2']]
-      - Let: [c, int32x4_t, {FnCall: [transmute, [c]]}]
-      - Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]}]
-      - FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: ['vreinterpret{type[4]}_s8_s32', [c]]}]] 
+      - Let: [c, {FnCall: [vreinterpretq_s32_s8, [c]]}]
+      - Let: [c, {FnCall: ['vdup{neon_type[0].laneq_nox}', [c], [LANE]]}]
+      - FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: ['vreinterpret{type[3]}_s8_s32', [c]]}]] 
 
   - name: "vsudot{neon_type[0].laneq_nox}"
     doc: "Dot product index form with signed and unsigned integers"
@@ -7001,22 +6997,20 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [int32x2_t, int8x8_t, uint8x16_t, '[LANE as u32, LANE as u32]', uint32x2_t]
-      - [int32x4_t, int8x16_t, uint8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]', uint32x4_t]
+      - [int32x2_t, int8x8_t, uint8x16_t, uint32x2_t, '']
+      - [int32x4_t, int8x16_t, uint8x16_t, uint32x4_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 2]]
       - Let:
           - c
-          - uint32x4_t
-          - FnCall: [transmute, [c]]
+          - FnCall: [vreinterpretq_u32_u8, [c]]
       - Let:
           - c
-          - "{type[4]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
+          - FnCall: ['vdup{neon_type[3].laneq_nox}', [c], [LANE]]
       - FnCall: 
           - "vusdot{neon_type[0].no}"
           - - a
-            - FnCall: [transmute, [c]]
+            - FnCall: ['vreinterpret{type[4]}_u8_u32', [c]]
             - b
 
   - name: "vdot{neon_type[0].laneq_nox}"
@@ -7033,23 +7027,21 @@ intrinsics:
       - FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]
     safety: safe
     types:
-      - [int32x2_t, int8x8_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32]', '']
-      - [int32x4_t, int8x16_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
+      - [int32x2_t, int8x8_t, int8x16_t, '']
+      - [int32x4_t, int8x16_t, int8x16_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '2']]
       - Let:
           - c
-          - "{neon_type[3]}"
-          - FnCall: ['vreinterpretq_{neon_type[0]}_{neon_type[1]}', [c]]
+          - FnCall: [vreinterpretq_s32_s8, [c]]
       - Let:
           - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
+          - FnCall: ['vdup{neon_type[0].laneq_nox}', [c], [LANE]]
       - FnCall:
           - "vdot{neon_type[0].no}"
           - - a
             - b
-            - FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
+            - FnCall: ['vreinterpret{type[3]}_s8_s32', [c]]
 
   - name: "vdot{neon_type[0].laneq_nox}"
     doc: Dot product arithmetic (indexed)
@@ -7065,23 +7057,21 @@ intrinsics:
       - FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]
     safety: safe
     types:
-      - [uint32x2_t, uint8x8_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32]','']
-      - [uint32x4_t, uint8x16_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
+      - [uint32x2_t, uint8x8_t, uint8x16_t, '']
+      - [uint32x4_t, uint8x16_t, uint8x16_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '2']]
       - Let:
           - c
-          - "{neon_type[3]}"
-          - FnCall: ['vreinterpretq_{neon_type[0]}_{neon_type[1]}', [c]]
+          - FnCall: ['vreinterpretq_u32_u8', [c]]
       - Let:
           - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
+          - FnCall: ['vdup{neon_type[0].laneq_nox}', [c], [LANE]]
       - FnCall:
           - "vdot{neon_type[0].no}"
           - - a
             - b
-            - FnCall: [transmute, [c]]
+            - FnCall: ['vreinterpret{type[3]}_u8_u32', [c]]
 
   - name: "vdot{neon_type[0].no}"
     doc: Dot product arithmetic (vector)
@@ -7146,23 +7136,21 @@ intrinsics:
       - *neon-cfg-arm-unstable
     safety: safe
     types:
-      - [int32x2_t, int8x8_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32]','']
-      - [int32x4_t, int8x16_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
+      - [int32x2_t, int8x8_t, int8x8_t, '']
+      - [int32x4_t, int8x16_t, int8x8_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '1']]
       - Let:
           - c
-          - "{neon_type[3]}"
-          - FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]]
+          - FnCall: ['vreinterpret_s32_s8', [c]]
       - Let:
           - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
+          - FnCall: ['vdup{neon_type[0].lane_nox}', [c], [LANE]]
       - FnCall:
           - "vdot{neon_type[0].no}"
           - - a
             - b
-            - FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
+            - FnCall: ['vreinterpret{type[3]}_s8_s32', [c]]
 
   - name: "vdot{neon_type[0].lane_nox}"
     doc: Dot product arithmetic (indexed)
@@ -7179,23 +7167,21 @@ intrinsics:
       - *neon-cfg-arm-unstable
     safety: safe
     types:
-      - [uint32x2_t, uint8x8_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32]','']
-      - [uint32x4_t, uint8x16_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
+      - [uint32x2_t, uint8x8_t, uint8x8_t, '']
+      - [uint32x4_t, uint8x16_t, uint8x8_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '1']]
       - Let:
           - c
-          - "{neon_type[3]}"
-          - FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]]
+          - FnCall: ['vreinterpret_u32_u8', [c]]
       - Let:
           - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
+          - FnCall: ['vdup{neon_type[0].lane_nox}', [c], [LANE]]
       - FnCall:
           - "vdot{neon_type[0].no}"
           - - a
             - b
-            - FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
+            - FnCall: ['vreinterpret{type[3]}_u8_u32', [c]]
 
   - name: "vmax{neon_type.no}"
     doc: Maximum (vector)

From 63385588deda311a7c1bde381063830c6739e01e Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Sat, 9 May 2026 09:54:33 +0530
Subject: [PATCH 39/78] Split `vcopyq_lane{q}_p64` due to `vsetq_lane_p64`
 requiring `aes`

---
 .../core_arch/src/aarch64/neon/generated.rs   |  4 +-
 .../spec/neon/aarch64.spec.yml                | 37 ++++++++++++++++++-
 2 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 63ffa6765cec6..fef672cab9acd 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -4618,7 +4618,7 @@ pub fn vcopyq_lane_p64<const LANE1: i32, const LANE2: i32>(
     static_assert_uimm_bits!(LANE1, 1);
     static_assert!(LANE2 == 0);
     let b: poly64x2_t = vcombine_p64(b, b);
-    vsetq_lane_p64::<LANE1>(vgetq_lane_p64::<LANE2>(b), a)
+    unsafe { simd_insert!(a, LANE1 as u32, simd_extract!(b, LANE2 as u32, p64)) }
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s8)"]
@@ -5001,7 +5001,7 @@ pub fn vcopyq_laneq_p64<const LANE1: i32, const LANE2: i32>(
 ) -> poly64x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert_uimm_bits!(LANE2, 1);
-    vsetq_lane_p64::<LANE1>(vgetq_lane_p64::<LANE2>(b), a)
+    unsafe { simd_insert!(a, LANE1 as u32, simd_extract!(b, LANE2 as u32, p64)) }
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f64)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 7664bf3c10fa4..401bb504b3a43 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -8856,7 +8856,6 @@ intrinsics:
       - [uint64x2_t, uint64x2_t, uint64x2_t, '1', '1']
       - [poly8x16_t, poly8x16_t, poly8x16_t, '4', '4']
       - [poly16x8_t, poly16x8_t, poly16x8_t, '3', '3']
-      - [poly64x2_t, poly64x2_t, poly64x2_t, '1', '1']
       - [float32x4_t, float32x4_t, float32x4_t, '2', '2']
       - [float64x2_t, float64x2_t, float64x2_t, '1', '1']
     compose:
@@ -8864,6 +8863,23 @@ intrinsics:
       - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']]
       - FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE2]]}, a], [LANE1]]
 
+  - name: "vcopy{neon_type[0].laneq_nox}"
+    doc: "Insert vector element from another vector element"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1', '3']]
+      - *neon-stable
+    static_defs: ['const LANE1: i32, const LANE2: i32']
+    safety: safe
+    types:
+      - [poly64x2_t, poly64x2_t, poly64x2_t, '1', '1']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']]
+      - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']]
+      - FnCall: [simd_insert!, [a, LANE1 as u32, {FnCall: [simd_extract!, [b, LANE2 as u32, p64]]}]]
+
   - name: "vcopy{neon_type[0].laneq_nox}"
     doc: "Insert vector element from another vector element"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
@@ -8902,7 +8918,6 @@ intrinsics:
     types:
       - [int64x2_t, int64x1_t]
       - [uint64x2_t, uint64x1_t]
-      - [poly64x2_t, poly64x1_t]
       - [float64x2_t, float64x1_t]
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE1, '1']]
@@ -8910,6 +8925,24 @@ intrinsics:
       - Let: [b, '{neon_type[0]}', {FnCall: ['vcombine{neon_type[1].no}', [b, b]]}]
       - FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[0].lane_nox}', [b], [LANE2]]}, a], [LANE1]]
 
+  - name: "vcopyq_lane_{neon_type[0]}"
+    doc: "Insert vector element from another vector element"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 1', 'LANE2 = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1', '3']]
+      - *neon-stable
+    static_defs: ['const LANE1: i32, const LANE2: i32']
+    safety: safe
+    types:
+      - [poly64x2_t, poly64x1_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE1, '1']]
+      - FnCall: [static_assert!, ['LANE2 == 0']]
+      - Let: [b, '{neon_type[0]}', {FnCall: ['vcombine{neon_type[1].no}', [b, b]]}]
+      - FnCall: [simd_insert!, [a, LANE1 as u32, {FnCall: [simd_extract!, [b, LANE2 as u32, p64]]}]]
+
   - name: "vcopyq_lane_f32"
     doc: "Insert vector element from another vector element"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]

From 494200ba488f14ba1a6cd323cb0dd8a781b22906 Mon Sep 17 00:00:00 2001
From: WANG Rui <wangrui@loongson.cn>
Date: Wed, 6 May 2026 16:27:37 +0800
Subject: [PATCH 40/78] loongarch: Use `intrinsics::simd` for vadda

---
 .../src/loongarch64/lasx/generated.rs         | 36 -------------------
 .../src/loongarch64/lasx/portable.rs          |  4 +++
 .../src/loongarch64/lsx/generated.rs          | 36 -------------------
 .../core_arch/src/loongarch64/lsx/portable.rs |  4 +++
 .../crates/core_arch/src/loongarch64/simd.rs  | 13 +++++++
 .../crates/stdarch-gen-loongarch/lasx.spec    |  4 +++
 .../crates/stdarch-gen-loongarch/lsx.spec     |  4 +++
 .../src/portable-intrinsics.txt               |  8 +++++
 8 files changed, 37 insertions(+), 72 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs b/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
index 387a3e564c530..5ef7132637468 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
@@ -91,14 +91,6 @@ unsafe extern "unadjusted" {
     fn __lasx_xvsat_wu(a: __v8u32, b: u32) -> __v8u32;
     #[link_name = "llvm.loongarch.lasx.xvsat.du"]
     fn __lasx_xvsat_du(a: __v4u64, b: u32) -> __v4u64;
-    #[link_name = "llvm.loongarch.lasx.xvadda.b"]
-    fn __lasx_xvadda_b(a: __v32i8, b: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvadda.h"]
-    fn __lasx_xvadda_h(a: __v16i16, b: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvadda.w"]
-    fn __lasx_xvadda_w(a: __v8i32, b: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvadda.d"]
-    fn __lasx_xvadda_d(a: __v4i64, b: __v4i64) -> __v4i64;
     #[link_name = "llvm.loongarch.lasx.xvavg.b"]
     fn __lasx_xvavg_b(a: __v32i8, b: __v32i8) -> __v32i8;
     #[link_name = "llvm.loongarch.lasx.xvavg.h"]
@@ -1445,34 +1437,6 @@ pub fn lasx_xvsat_du<const IMM6: u32>(a: m256i) -> m256i {
     unsafe { transmute(__lasx_xvsat_du(transmute(a), IMM6)) }
 }
 
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvadda_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvadda_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvadda_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvadda_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvadda_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvadda_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvadda_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvadda_d(transmute(a), transmute(b))) }
-}
-
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs b/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs
index 147d94fe21abc..366deda0c17c6 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs
@@ -148,6 +148,10 @@ impl_vvv!("lasx", lasx_xvssub_bu, is::simd_saturating_sub, m256i, u8x32);
 impl_vvv!("lasx", lasx_xvssub_hu, is::simd_saturating_sub, m256i, u16x16);
 impl_vvv!("lasx", lasx_xvssub_wu, is::simd_saturating_sub, m256i, u32x8);
 impl_vvv!("lasx", lasx_xvssub_du, is::simd_saturating_sub, m256i, u64x4);
+impl_vvv!("lasx", lasx_xvadda_b, ls::simd_adda, m256i, i8x32);
+impl_vvv!("lasx", lasx_xvadda_h, ls::simd_adda, m256i, i16x16);
+impl_vvv!("lasx", lasx_xvadda_w, ls::simd_adda, m256i, i32x8);
+impl_vvv!("lasx", lasx_xvadda_d, ls::simd_adda, m256i, i64x4);
 
 impl_vuv!("lasx", lasx_xvslli_b, is::simd_shl, m256i, i8x32);
 impl_vuv!("lasx", lasx_xvslli_h, is::simd_shl, m256i, i16x16);
diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs b/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
index 4f9a022c1de4e..1df46a43fedf9 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
@@ -91,14 +91,6 @@ unsafe extern "unadjusted" {
     fn __lsx_vsat_wu(a: __v4u32, b: u32) -> __v4u32;
     #[link_name = "llvm.loongarch.lsx.vsat.du"]
     fn __lsx_vsat_du(a: __v2u64, b: u32) -> __v2u64;
-    #[link_name = "llvm.loongarch.lsx.vadda.b"]
-    fn __lsx_vadda_b(a: __v16i8, b: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vadda.h"]
-    fn __lsx_vadda_h(a: __v8i16, b: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vadda.w"]
-    fn __lsx_vadda_w(a: __v4i32, b: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vadda.d"]
-    fn __lsx_vadda_d(a: __v2i64, b: __v2i64) -> __v2i64;
     #[link_name = "llvm.loongarch.lsx.vavg.b"]
     fn __lsx_vavg_b(a: __v16i8, b: __v16i8) -> __v16i8;
     #[link_name = "llvm.loongarch.lsx.vavg.h"]
@@ -1357,34 +1349,6 @@ pub fn lsx_vsat_du<const IMM6: u32>(a: m128i) -> m128i {
     unsafe { transmute(__lsx_vsat_du(transmute(a), IMM6)) }
 }
 
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vadda_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vadda_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vadda_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vadda_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vadda_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vadda_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vadda_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vadda_d(transmute(a), transmute(b))) }
-}
-
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs b/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs
index e98dcf050f56d..55d349142ec86 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs
@@ -148,6 +148,10 @@ impl_vvv!("lsx", lsx_vssub_bu, is::simd_saturating_sub, m128i, u8x16);
 impl_vvv!("lsx", lsx_vssub_hu, is::simd_saturating_sub, m128i, u16x8);
 impl_vvv!("lsx", lsx_vssub_wu, is::simd_saturating_sub, m128i, u32x4);
 impl_vvv!("lsx", lsx_vssub_du, is::simd_saturating_sub, m128i, u64x2);
+impl_vvv!("lsx", lsx_vadda_b, ls::simd_adda, m128i, i8x16);
+impl_vvv!("lsx", lsx_vadda_h, ls::simd_adda, m128i, i16x8);
+impl_vvv!("lsx", lsx_vadda_w, ls::simd_adda, m128i, i32x4);
+impl_vvv!("lsx", lsx_vadda_d, ls::simd_adda, m128i, i64x2);
 
 impl_vuv!("lsx", lsx_vslli_b, is::simd_shl, m128i, i8x16);
 impl_vuv!("lsx", lsx_vslli_h, is::simd_shl, m128i, i16x8);
diff --git a/library/stdarch/crates/core_arch/src/loongarch64/simd.rs b/library/stdarch/crates/core_arch/src/loongarch64/simd.rs
index 6f573f56d690f..ccad4b8c48729 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/simd.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/simd.rs
@@ -45,6 +45,19 @@ impl_simd_ext!(i64x4, i64);
 impl_simd_ext!(u64x2, u64);
 impl_simd_ext!(u64x4, u64);
 
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const unsafe fn simd_abs<T: Copy + const SimdExt>(a: T) -> T {
+    let m: T = is::simd_lt(a, ls::simd_splat(0));
+    is::simd_select(m, is::simd_neg(a), a)
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const unsafe fn simd_adda<T: Copy + const SimdExt>(a: T, b: T) -> T {
+    is::simd_add(ls::simd_abs(a), ls::simd_abs(b))
+}
+
 #[inline(always)]
 #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
 pub(super) const unsafe fn simd_andn<T: Copy + const SimdExt>(a: T, b: T) -> T {
diff --git a/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec b/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec
index 27362d3259f9c..07f2ed49d3a68 100644
--- a/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec
+++ b/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec
@@ -924,21 +924,25 @@ asm-fmts = xd, xj, ui6
 data-types = UV4DI, UV4DI, UQI
 
 /// lasx_xvadda_b
+impl = portable
 name = lasx_xvadda_b
 asm-fmts = xd, xj, xk
 data-types = V32QI, V32QI, V32QI
 
 /// lasx_xvadda_h
+impl = portable
 name = lasx_xvadda_h
 asm-fmts = xd, xj, xk
 data-types = V16HI, V16HI, V16HI
 
 /// lasx_xvadda_w
+impl = portable
 name = lasx_xvadda_w
 asm-fmts = xd, xj, xk
 data-types = V8SI, V8SI, V8SI
 
 /// lasx_xvadda_d
+impl = portable
 name = lasx_xvadda_d
 asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI
diff --git a/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec b/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec
index 9b1b7cc2457d7..0b32dd83c72ff 100644
--- a/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec
+++ b/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec
@@ -924,21 +924,25 @@ asm-fmts = vd, vj, ui6
 data-types = UV2DI, UV2DI, UQI
 
 /// lsx_vadda_b
+impl = portable
 name = lsx_vadda_b
 asm-fmts = vd, vj, vk
 data-types = V16QI, V16QI, V16QI
 
 /// lsx_vadda_h
+impl = portable
 name = lsx_vadda_h
 asm-fmts = vd, vj, vk
 data-types = V8HI, V8HI, V8HI
 
 /// lsx_vadda_w
+impl = portable
 name = lsx_vadda_w
 asm-fmts = vd, vj, vk
 data-types = V4SI, V4SI, V4SI
 
 /// lsx_vadda_d
+impl = portable
 name = lsx_vadda_d
 asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI
diff --git a/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt b/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt
index 423af4cd71a6c..6e3e5fe32e33f 100644
--- a/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt
+++ b/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt
@@ -219,6 +219,10 @@ lsx_vssub_bu
 lsx_vssub_hu
 lsx_vssub_wu
 lsx_vssub_du
+lsx_vadda_b
+lsx_vadda_h
+lsx_vadda_w
+lsx_vadda_d
 
 # LASX intrinsics
 lasx_xvsll_b
@@ -435,3 +439,7 @@ lasx_xvssub_bu
 lasx_xvssub_hu
 lasx_xvssub_wu
 lasx_xvssub_du
+lasx_xvadda_b
+lasx_xvadda_h
+lasx_xvadda_w
+lasx_xvadda_d

From 7012f652b489ec43cc9ca737534c6b0ed6f6a7cc Mon Sep 17 00:00:00 2001
From: David Wood <david.wood2@arm.com>
Date: Mon, 20 Apr 2026 14:21:22 +0100
Subject: [PATCH 41/78] gen-arm: split `vreinterpret` defn into multiple

This enables the `big_endian_inverse` transformation to apply to some
of these intrinsics only when appropriate.
---
 .../core_arch/src/aarch64/neon/generated.rs   |  510 +-
 .../src/arm_shared/neon/generated.rs          | 6154 ++++++-----------
 .../spec/neon/aarch64.spec.yml                |   37 +-
 .../spec/neon/arm_shared.spec.yml             |  200 +-
 4 files changed, 2541 insertions(+), 4360 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index fef672cab9acd..7ffeb5e639bd4 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -17933,6 +17933,186 @@ pub fn vreinterpretq_f16_f64(a: float64x2_t) -> float16x8_t {
     }
 }
 #[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vreinterpret_s64_f64(a: float64x1_t) -> int64x1_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vreinterpret_u64_f64(a: float64x1_t) -> uint64x1_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vreinterpret_p64_f64(a: float64x1_t) -> poly64x1_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vreinterpret_f64_s64(a: int64x1_t) -> float64x1_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vreinterpret_p64_s64(a: int64x1_t) -> poly64x1_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vreinterpret_f64_u64(a: uint64x1_t) -> float64x1_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vreinterpret_p64_u64(a: uint64x1_t) -> poly64x1_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vreinterpret_f64_p64(a: poly64x1_t) -> float64x1_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vreinterpret_s64_p64(a: poly64x1_t) -> int64x1_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vreinterpret_u64_p64(a: poly64x1_t) -> uint64x1_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p128)"]
 #[inline]
 #[cfg(target_endian = "little")]
@@ -18142,15 +18322,6 @@ pub fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t {
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_s64_f64(a: float64x1_t) -> int64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f64)"]
 #[inline]
 #[cfg(target_endian = "little")]
@@ -18220,15 +18391,6 @@ pub fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t {
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_u64_f64(a: float64x1_t) -> uint64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f64)"]
 #[inline]
 #[cfg(target_endian = "little")]
@@ -18275,15 +18437,6 @@ pub fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t {
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_p64_f64(a: float64x1_t) -> poly64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f64)"]
 #[inline]
 #[cfg(target_endian = "little")]
@@ -18407,30 +18560,6 @@ pub fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t {
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t {
-    unsafe {
-        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f64)"]
 #[inline]
 #[cfg(target_endian = "little")]
@@ -18507,30 +18636,6 @@ pub fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t {
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t {
-    unsafe {
-        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f64)"]
 #[inline]
 #[cfg(target_endian = "little")]
@@ -18583,30 +18688,6 @@ pub fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t {
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t {
-    unsafe {
-        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
@@ -18749,72 +18830,6 @@ pub fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t {
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_s64(a: int64x1_t) -> float64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_p64_s64(a: int64x1_t) -> poly64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t {
-    unsafe {
-        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t {
-    unsafe {
-        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
@@ -18957,72 +18972,6 @@ pub fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t {
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_u64(a: uint64x1_t) -> float64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_p64_u64(a: uint64x1_t) -> poly64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t {
-    unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t {
-    unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
@@ -19141,33 +19090,6 @@ pub fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t {
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_p64(a: poly64x1_t) -> float64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_s64_p64(a: poly64x1_t) -> int64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_u64_p64(a: poly64x1_t) -> uint64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
@@ -19191,78 +19113,6 @@ pub fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t {
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t {
-    unsafe {
-        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t {
-    unsafe {
-        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t {
-    unsafe {
-        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
 #[doc = "Floating-point round to 32-bit integer, using current rounding mode"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32x_f32)"]
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index 476d8e78a21a2..968206fc5ef4f 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -39123,7 +39123,7 @@ pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t {
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -39142,11 +39142,11 @@ pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t {
+pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -39165,15 +39165,15 @@ pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t {
+pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t {
     unsafe {
         let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let ret_val: int32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -39192,11 +39192,11 @@ pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t {
+pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -39215,15 +39215,14 @@ pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t {
+pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t {
     unsafe {
         let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -39242,11 +39241,11 @@ pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t {
+pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -39265,14 +39264,15 @@ pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t {
+pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t {
     unsafe {
         let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
+        let ret_val: uint8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -39291,11 +39291,11 @@ pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t {
+pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -39314,15 +39314,15 @@ pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t {
+pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t {
     unsafe {
         let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let ret_val: uint32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -39341,11 +39341,11 @@ pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t {
+pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -39364,15 +39364,14 @@ pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t {
+pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t {
     unsafe {
         let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -39391,11 +39390,11 @@ pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t {
+pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -39414,15 +39413,15 @@ pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t {
+pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t {
     unsafe {
         let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let ret_val: poly8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -39441,11 +39440,11 @@ pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t {
+pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -39464,14 +39463,15 @@ pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t {
+pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t {
     unsafe {
-        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -39490,11 +39490,11 @@ pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t {
+pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -39513,15 +39513,19 @@ pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t {
+pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t {
     unsafe {
-        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -39540,11 +39544,11 @@ pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t {
+pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -39563,15 +39567,15 @@ pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t {
+pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t {
     unsafe {
-        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly16x4_t = transmute(a);
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int32x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -39590,11 +39594,11 @@ pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t {
+pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -39613,15 +39617,15 @@ pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t {
+pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t {
     unsafe {
         let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let ret_val: int64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -39640,11 +39644,11 @@ pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t {
+pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -39663,10 +39667,10 @@ pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t {
+pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t {
     unsafe {
         let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int8x16_t = transmute(a);
+        let ret_val: uint8x16_t = transmute(a);
         simd_shuffle!(
             ret_val,
             ret_val,
@@ -39675,7 +39679,7 @@ pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t {
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -39694,11 +39698,11 @@ pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t {
+pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -39717,15 +39721,15 @@ pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t {
+pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t {
     unsafe {
         let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let ret_val: uint32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -39744,11 +39748,11 @@ pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t {
+pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -39767,15 +39771,15 @@ pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t {
+pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t {
     unsafe {
         let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let ret_val: uint64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -39794,11 +39798,11 @@ pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t {
+pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -39817,15 +39821,19 @@ pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t {
+pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t {
     unsafe {
         let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let ret_val: poly8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -39844,11 +39852,11 @@ pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t {
+pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -39867,19 +39875,15 @@ pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t {
+pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t {
     unsafe {
-        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: float16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -39898,11 +39902,11 @@ pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t {
+pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -39921,15 +39925,15 @@ pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t {
+pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t {
     unsafe {
-        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint16x8_t = transmute(a);
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: float16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -39948,11 +39952,11 @@ pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t {
+pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -39971,15 +39975,15 @@ pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t {
+pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t {
     unsafe {
-        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint32x4_t = transmute(a);
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -39998,11 +40002,11 @@ pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t {
+pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -40021,15 +40025,16 @@ pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t {
+pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t {
     unsafe {
-        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -40048,11 +40053,11 @@ pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t {
+pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -40071,19 +40076,15 @@ pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t {
+pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t {
     unsafe {
-        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: float16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -40102,11 +40103,11 @@ pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t {
+pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -40125,15 +40126,15 @@ pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t {
+pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t {
     unsafe {
-        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly16x8_t = transmute(a);
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: float16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -40152,11 +40153,11 @@ pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t {
+pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -40175,15 +40176,14 @@ pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t {
+pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t {
     unsafe {
-        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: float16x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -40202,11 +40202,11 @@ pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t {
+pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -40225,15 +40225,15 @@ pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t {
+pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t {
     unsafe {
-        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: float16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -40252,11 +40252,11 @@ pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t {
+pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -40275,15 +40275,15 @@ pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t {
+pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t {
     unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
         let ret_val: float16x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -40302,11 +40302,11 @@ pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t {
+pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -40325,16 +40325,16 @@ pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t {
+pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t {
     unsafe {
-        let a: int8x16_t =
+        let a: uint8x16_t =
             simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
         let ret_val: float16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -40353,11 +40353,11 @@ pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t {
+pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -40376,15 +40376,15 @@ pub fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t {
+pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t {
     unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: float16x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -40403,11 +40403,11 @@ pub fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t {
+pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -40426,15 +40426,15 @@ pub fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t {
+pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t {
     unsafe {
-        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
         let ret_val: float16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -40453,11 +40453,11 @@ pub fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t {
+pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -40476,15 +40476,14 @@ pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t {
+pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t {
     unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: float16x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -40503,11 +40502,11 @@ pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t {
+pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -40526,15 +40525,15 @@ pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t {
+pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t {
     unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: float16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -40553,11 +40552,11 @@ pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t {
+pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -40576,14 +40575,15 @@ pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t {
+pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t {
     unsafe {
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
         let ret_val: float16x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -40602,11 +40602,11 @@ pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t {
+pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -40625,17 +40625,17 @@ pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t {
+pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t {
     unsafe {
-        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
         let ret_val: float16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40652,13 +40652,12 @@ pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t {
+pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40675,17 +40674,12 @@ pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t {
-    unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40702,13 +40696,12 @@ pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t {
+pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40725,18 +40718,12 @@ pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t {
-    unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40753,13 +40740,12 @@ pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t {
+pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40776,17 +40762,12 @@ pub fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t {
-    unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40803,13 +40784,12 @@ pub fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t {
+pub fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40826,17 +40806,12 @@ pub fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t {
-    unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40853,13 +40828,12 @@ pub fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t {
+pub fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40876,17 +40850,12 @@ pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t {
-    unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40903,13 +40872,12 @@ pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t {
+pub fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40926,17 +40894,12 @@ pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t {
-    unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40946,20 +40909,18 @@ pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t {
+pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40969,23 +40930,18 @@ pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t {
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40995,20 +40951,18 @@ pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t {
+pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41018,24 +40972,18 @@ pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t {
-    unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41045,20 +40993,18 @@ pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t {
+pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41068,1583 +41014,18 @@ pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t {
-    unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t {
-    unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t {
-    unsafe {
-        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t {
-    unsafe {
-        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t {
-    unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t {
-    unsafe {
-        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 {
-    unsafe {
-        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t {
-    unsafe {
-        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t {
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t {
-    unsafe {
-        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
-    unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
-    unsafe {
-        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
-    unsafe {
-        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
-    unsafe {
-        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
-    unsafe {
-        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
-        transmute(a)
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
-    unsafe {
-        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
-    unsafe {
-        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
-    unsafe {
-        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
-    unsafe {
-        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
-        transmute(a)
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
-    unsafe {
-        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
-    unsafe {
-        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
-    unsafe {
-        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
-    unsafe {
-        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
-    unsafe {
-        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
-    unsafe {
-        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
-    unsafe {
-        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
-    unsafe {
-        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
-    unsafe {
-        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
-    unsafe {
-        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
-    unsafe {
-        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
-    unsafe {
-        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
-    unsafe {
-        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42660,13 +41041,12 @@ pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
+pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42682,17 +41062,12 @@ pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42708,13 +41083,12 @@ pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
+pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42730,17 +41104,12 @@ pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42756,13 +41125,12 @@ pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
+pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42778,16 +41146,12 @@ pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42803,13 +41167,12 @@ pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
+pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42825,17 +41188,12 @@ pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42851,13 +41209,12 @@ pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
+pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42873,17 +41230,12 @@ pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42899,13 +41251,12 @@ pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
+pub fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42921,17 +41272,12 @@ pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42947,13 +41293,12 @@ pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
+pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42969,16 +41314,12 @@ pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42994,13 +41335,12 @@ pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
+pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43016,17 +41356,12 @@ pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43042,13 +41377,12 @@ pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
+pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43064,17 +41398,12 @@ pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43090,13 +41419,12 @@ pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
+pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43112,18 +41440,12 @@ pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
-    unsafe {
-        let a: int8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43139,13 +41461,12 @@ pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
+pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43161,18 +41482,12 @@ pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
-    unsafe {
-        let a: int8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43188,13 +41503,12 @@ pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
+pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43210,18 +41524,12 @@ pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
-    unsafe {
-        let a: int8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43237,13 +41545,12 @@ pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
+pub fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43259,18 +41566,12 @@ pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
-    unsafe {
-        let a: int8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43286,13 +41587,12 @@ pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
+pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43303,27 +41603,17 @@ pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
-    unsafe {
-        let a: int8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43339,13 +41629,12 @@ pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
+pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43361,18 +41650,12 @@ pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
-    unsafe {
-        let a: int8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43388,13 +41671,12 @@ pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
+pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43410,18 +41692,12 @@ pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
-    unsafe {
-        let a: int8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43437,13 +41713,12 @@ pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
+pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43459,20 +41734,15 @@ pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
-    unsafe {
-        let a: int8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -43480,21 +41750,22 @@ pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -43502,30 +41773,25 @@ pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t {
     unsafe {
-        let a: int8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let ret_val: float16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -43533,21 +41799,22 @@ pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -43555,26 +41822,25 @@ pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t {
     unsafe {
-        let a: int8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -43582,21 +41848,22 @@ pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -43604,25 +41871,25 @@ pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 {
     unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -43630,21 +41897,22 @@ pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -43652,25 +41920,26 @@ pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t {
     unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -43678,21 +41947,22 @@ pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -43700,25 +41970,25 @@ pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t {
     unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let ret_val: float16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -43726,21 +41996,22 @@ pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -43748,20 +42019,22 @@ pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t {
     unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: float16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -43779,11 +42052,11 @@ pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
+pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -43801,15 +42074,14 @@ pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
+pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
     unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let ret_val: float32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -43827,11 +42099,11 @@ pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
+pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -43849,15 +42121,15 @@ pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
+pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
     unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: int8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -43875,11 +42147,11 @@ pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
+pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -43897,15 +42169,15 @@ pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
+pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
     unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: int16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -43923,11 +42195,11 @@ pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
+pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -43945,14 +42217,14 @@ pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
+pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
     unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
         transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -43970,11 +42242,11 @@ pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
+pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -43992,15 +42264,15 @@ pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
+pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
     unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly8x8_t = transmute(a);
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint8x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44018,11 +42290,11 @@ pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
+pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44040,15 +42312,15 @@ pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
+pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
     unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly16x4_t = transmute(a);
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint16x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44066,11 +42338,11 @@ pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
+pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44088,15 +42360,14 @@ pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
+pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
     unsafe {
-        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44114,11 +42385,11 @@ pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
+pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44136,19 +42407,15 @@ pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
+pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
     unsafe {
-        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: poly8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44166,11 +42433,11 @@ pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
+pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44188,15 +42455,15 @@ pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
+pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
     unsafe {
-        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int32x4_t = transmute(a);
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: poly16x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44214,11 +42481,11 @@ pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
+pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44236,15 +42503,14 @@ pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
+pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
     unsafe {
-        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44262,11 +42528,11 @@ pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
+pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44284,10 +42550,10 @@ pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
+pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
     unsafe {
-        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x16_t = transmute(a);
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int8x16_t = transmute(a);
         simd_shuffle!(
             ret_val,
             ret_val,
@@ -44296,7 +42562,7 @@ pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44314,11 +42580,11 @@ pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
+pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44336,15 +42602,15 @@ pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
+pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
     unsafe {
-        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint16x8_t = transmute(a);
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44362,11 +42628,11 @@ pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
+pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44384,15 +42650,15 @@ pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
+pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
     unsafe {
-        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44410,11 +42676,11 @@ pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
+pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44432,15 +42698,19 @@ pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
+pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
     unsafe {
-        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44458,11 +42728,11 @@ pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
+pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44480,19 +42750,15 @@ pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
+pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
     unsafe {
-        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44510,11 +42776,11 @@ pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
+pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44532,15 +42798,15 @@ pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
+pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
     unsafe {
-        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44558,11 +42824,11 @@ pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
+pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44580,15 +42846,19 @@ pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
+pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
     unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44606,11 +42876,11 @@ pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
+pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44628,15 +42898,15 @@ pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
+pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
     unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int8x8_t = transmute(a);
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44654,11 +42924,11 @@ pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
+pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44676,15 +42946,15 @@ pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
+pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
     unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44702,11 +42972,11 @@ pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
+pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44724,14 +42994,15 @@ pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
+pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
     unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        transmute(a)
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44749,11 +43020,11 @@ pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
+pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44771,15 +43042,15 @@ pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
+pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
     unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44797,11 +43068,11 @@ pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
+pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44819,15 +43090,14 @@ pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
+pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
     unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44845,11 +43115,11 @@ pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
+pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44867,15 +43137,15 @@ pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
+pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
     unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44893,11 +43163,11 @@ pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
+pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44915,14 +43185,15 @@ pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
+pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
     unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        transmute(a)
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44940,11 +43211,11 @@ pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
+pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44962,15 +43233,14 @@ pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
+pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
     unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44988,11 +43258,11 @@ pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
+pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -45010,15 +43280,15 @@ pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
+pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
     unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
         let ret_val: poly16x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -45036,11 +43306,11 @@ pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
+pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -45058,15 +43328,14 @@ pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
+pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
     unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -45084,11 +43353,11 @@ pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
+pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -45106,19 +43375,15 @@ pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
+pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
     unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -45136,11 +43401,11 @@ pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
+pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -45158,15 +43423,16 @@ pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
+pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
     unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -45184,11 +43450,11 @@ pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
+pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -45206,15 +43472,16 @@ pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
+pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
     unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -45232,11 +43499,11 @@ pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
+pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -45254,19 +43521,16 @@ pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
+pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
     unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -45284,11 +43548,11 @@ pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
+pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -45306,15 +43570,16 @@ pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
+pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
     unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -45332,11 +43597,11 @@ pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
+pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -45354,15 +43619,16 @@ pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
+pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
     unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -45380,11 +43646,11 @@ pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
+pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -45402,15 +43668,16 @@ pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
+pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
     unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -45428,11 +43695,11 @@ pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
+pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -45450,19 +43717,16 @@ pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
+pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
     unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -45480,11 +43744,11 @@ pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
+pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -45502,15 +43766,16 @@ pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
+pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
     unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
         let ret_val: poly16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -45528,11 +43793,11 @@ pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
+pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -45550,14 +43815,16 @@ pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
+pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
     unsafe {
-        let ret_val: float32x2_t = transmute(a);
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly64x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -45575,11 +43842,11 @@ pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
+pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -45597,14 +43864,15 @@ pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
+pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
     unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: float32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -45622,11 +43890,11 @@ pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
+pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -45644,14 +43912,15 @@ pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
+pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
     unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -45669,11 +43938,11 @@ pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
+pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -45691,14 +43960,15 @@ pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
+pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
     unsafe {
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
         let ret_val: int32x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -45716,11 +43986,11 @@ pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
+pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -45738,14 +44008,14 @@ pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
+pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
     unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -45763,11 +44033,11 @@ pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
+pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -45785,14 +44055,15 @@ pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
+pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
     unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -45810,11 +44081,11 @@ pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
+pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -45832,35 +44103,15 @@ pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
+pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
     unsafe {
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
         let ret_val: uint32x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -45878,11 +44129,11 @@ pub fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
+pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -45900,14 +44151,14 @@ pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
+pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
     unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -45925,11 +44176,11 @@ pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
+pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -45947,14 +44198,15 @@ pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
+pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
     unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -45972,11 +44224,11 @@ pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
+pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -45994,15 +44246,14 @@ pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
+pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
     unsafe {
-        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46020,11 +44271,11 @@ pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
+pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46042,19 +44293,14 @@ pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
+pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
     unsafe {
-        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46072,11 +44318,11 @@ pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
+pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46094,15 +44340,15 @@ pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
+pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
     unsafe {
-        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46120,11 +44366,11 @@ pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
+pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46142,15 +44388,19 @@ pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
+pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
     unsafe {
-        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46168,11 +44418,11 @@ pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
+pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46190,19 +44440,15 @@ pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
+pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
     unsafe {
-        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46220,11 +44466,11 @@ pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
+pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46242,15 +44488,15 @@ pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
+pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
     unsafe {
-        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46268,11 +44514,11 @@ pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
+pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46290,15 +44536,19 @@ pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
+pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
     unsafe {
-        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46316,11 +44566,11 @@ pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
+pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46338,15 +44588,15 @@ pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
+pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
     unsafe {
-        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46364,11 +44614,11 @@ pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
+pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46386,19 +44636,15 @@ pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
+pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
     unsafe {
-        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46416,11 +44662,11 @@ pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
+pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46438,15 +44684,19 @@ pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
+pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
     unsafe {
-        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46464,11 +44714,11 @@ pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
+pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46486,15 +44736,15 @@ pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
+pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
     unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float32x2_t = transmute(a);
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly64x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46512,11 +44762,11 @@ pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
+pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46534,15 +44784,15 @@ pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
+pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
     unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: int8x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46560,11 +44810,11 @@ pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
+pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46582,15 +44832,15 @@ pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
+pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
     unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: int16x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46608,11 +44858,11 @@ pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
+pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46630,15 +44880,14 @@ pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
+pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
     unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46656,11 +44905,11 @@ pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
+pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46678,14 +44927,15 @@ pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
+pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
     unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46703,11 +44953,11 @@ pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
+pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46725,15 +44975,15 @@ pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
+pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
     unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: uint16x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46751,11 +45001,11 @@ pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
+pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46773,15 +45023,14 @@ pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
+pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
     unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46799,11 +45048,11 @@ pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
+pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46821,14 +45070,15 @@ pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
+pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
     unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: poly8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46846,11 +45096,11 @@ pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
+pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46868,15 +45118,15 @@ pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
+pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
     unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: poly16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46894,11 +45144,11 @@ pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
+pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46916,15 +45166,14 @@ pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
+pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
     unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46942,11 +45191,11 @@ pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
+pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46964,16 +45213,14 @@ pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
+pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
     unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46991,11 +45238,11 @@ pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
+pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47013,10 +45260,9 @@ pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
+pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
     unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
         let ret_val: int8x16_t = transmute(a);
         simd_shuffle!(
             ret_val,
@@ -47026,7 +45272,7 @@ pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47044,11 +45290,11 @@ pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
+pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47066,16 +45312,15 @@ pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
+pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
     unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
         let ret_val: int16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47093,11 +45338,11 @@ pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
+pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47115,16 +45360,15 @@ pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
+pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
     unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47142,11 +45386,11 @@ pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
+pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47164,16 +45408,19 @@ pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
+pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
     unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47191,11 +45438,11 @@ pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
+pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47213,16 +45460,15 @@ pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
+pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
     unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
         let ret_val: uint16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47240,11 +45486,11 @@ pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
+pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47262,16 +45508,15 @@ pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
+pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
     unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47289,11 +45534,11 @@ pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
+pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47311,16 +45556,19 @@ pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
+pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
     unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47338,11 +45586,11 @@ pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
+pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47360,20 +45608,15 @@ pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
+pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
     unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47391,11 +45634,11 @@ pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
+pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47413,16 +45656,15 @@ pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
+pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
     unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47440,11 +45682,11 @@ pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
+pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47462,15 +45704,14 @@ pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
+pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
     unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
         let ret_val: float32x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47488,11 +45729,11 @@ pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
+pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47510,15 +45751,14 @@ pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
+pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
     unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
         let ret_val: int8x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47536,11 +45776,11 @@ pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
+pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47558,15 +45798,14 @@ pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
+pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
     unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
         let ret_val: int16x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47584,11 +45823,11 @@ pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
+pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47606,15 +45845,14 @@ pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
+pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
     unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
         let ret_val: int32x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47632,11 +45870,11 @@ pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
+pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47654,14 +45892,14 @@ pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
+pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
     unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
+        let ret_val: uint8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47679,11 +45917,11 @@ pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
+pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47701,15 +45939,14 @@ pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
+pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
     unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let ret_val: uint16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47727,11 +45964,11 @@ pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
+pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47749,15 +45986,14 @@ pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
+pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
     unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
         let ret_val: uint32x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47775,11 +46011,11 @@ pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
+pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47797,14 +46033,14 @@ pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
+pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
     unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
+        let ret_val: poly8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47822,11 +46058,11 @@ pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
+pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47844,15 +46080,14 @@ pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
+pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
     unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let ret_val: poly16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47870,11 +46105,11 @@ pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
+pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47892,15 +46127,14 @@ pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
+pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
     unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47918,11 +46152,11 @@ pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
+pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47940,15 +46174,15 @@ pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
+pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
     unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: float32x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47966,11 +46200,11 @@ pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
+pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47988,9 +46222,9 @@ pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
+pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
     unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: int8x16_t = transmute(a);
         simd_shuffle!(
             ret_val,
@@ -48000,7 +46234,7 @@ pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48018,11 +46252,11 @@ pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
+pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48040,15 +46274,15 @@ pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
+pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
     unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: int16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48066,11 +46300,11 @@ pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
+pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48088,15 +46322,15 @@ pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
+pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
     unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: int32x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48114,11 +46348,11 @@ pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
+pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48136,15 +46370,19 @@ pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
+pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
     unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48162,11 +46400,11 @@ pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
+pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48184,19 +46422,15 @@ pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
+pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
     unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48214,11 +46448,11 @@ pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
+pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48236,15 +46470,15 @@ pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
+pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
     unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: uint32x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48262,11 +46496,11 @@ pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
+pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48284,15 +46518,19 @@ pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
+pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
     unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: poly8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48310,11 +46548,59 @@ pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
+pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
+    unsafe {
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: poly16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48332,19 +46618,15 @@ pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
+pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
     unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48362,11 +46644,11 @@ pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
+pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48384,15 +46666,15 @@ pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
+pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
     unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48410,11 +46692,11 @@ pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
+pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48432,15 +46714,15 @@ pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
+pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
     unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: float32x2_t = transmute(a);
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int32x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48458,11 +46740,11 @@ pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
+pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48480,15 +46762,14 @@ pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
+pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
     unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48506,11 +46787,11 @@ pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
+pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48528,15 +46809,15 @@ pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
+pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
     unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int16x4_t = transmute(a);
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48554,11 +46835,11 @@ pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
+pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48576,15 +46857,15 @@ pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
+pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
     unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int32x2_t = transmute(a);
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint32x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48602,11 +46883,11 @@ pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
+pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48624,14 +46905,14 @@ pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
+pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
     unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
         transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48649,11 +46930,11 @@ pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
+pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48671,15 +46952,15 @@ pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
+pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
     unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48697,11 +46978,11 @@ pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
+pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48719,15 +47000,14 @@ pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
+pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
     unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48745,11 +47025,11 @@ pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
+pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48767,14 +47047,15 @@ pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
+pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
     unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
         transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48792,11 +47073,11 @@ pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
+pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48814,15 +47095,16 @@ pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
+pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
     unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48840,11 +47122,11 @@ pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
+pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48862,15 +47144,16 @@ pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
+pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
     unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48888,11 +47171,11 @@ pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
+pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48910,15 +47193,16 @@ pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
+pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
     unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: float32x4_t = transmute(a);
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int32x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48936,11 +47220,11 @@ pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
+pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48958,19 +47242,16 @@ pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
+pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
     unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48988,11 +47269,11 @@ pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
+pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49010,15 +47291,16 @@ pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
+pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
     unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int16x8_t = transmute(a);
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49036,11 +47318,11 @@ pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
+pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49058,15 +47340,16 @@ pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
+pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
     unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int32x4_t = transmute(a);
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint32x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49084,11 +47367,11 @@ pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
+pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49106,15 +47389,16 @@ pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
+pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
     unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int64x2_t = transmute(a);
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint64x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49132,11 +47416,11 @@ pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
+pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49154,19 +47438,16 @@ pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
+pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
     unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49184,11 +47465,11 @@ pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
+pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49206,15 +47487,16 @@ pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
+pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
     unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49232,11 +47514,11 @@ pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
+pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49254,15 +47536,15 @@ pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
+pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
     unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint64x2_t = transmute(a);
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: float32x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49280,11 +47562,11 @@ pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
+pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49302,19 +47584,15 @@ pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
+pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
     unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49332,11 +47610,11 @@ pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
+pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49354,15 +47632,15 @@ pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
+pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
     unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49380,11 +47658,11 @@ pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
+pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49402,14 +47680,14 @@ pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
+pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
     unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49427,11 +47705,11 @@ pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
+pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49449,14 +47727,15 @@ pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
+pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
     unsafe {
-        let ret_val: int8x8_t = transmute(a);
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint8x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49474,11 +47753,11 @@ pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
+pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49496,14 +47775,15 @@ pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
+pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
     unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49521,11 +47801,11 @@ pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
+pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49543,15 +47823,16 @@ pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
+pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
     unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -49567,11 +47848,37 @@ pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t {
+pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
+    unsafe {
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49589,11 +47896,11 @@ pub fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
+pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49611,14 +47918,14 @@ pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
+pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
     unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49636,11 +47943,11 @@ pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
+pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49658,14 +47965,14 @@ pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
+pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
     unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49683,11 +47990,11 @@ pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
+pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49705,14 +48012,15 @@ pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
+pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
     unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49730,11 +48038,11 @@ pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
+pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49752,14 +48060,19 @@ pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
+pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
     unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49777,11 +48090,11 @@ pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
+pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49799,14 +48112,15 @@ pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
+pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
     unsafe {
-        let ret_val: poly16x4_t = transmute(a);
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int32x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49824,11 +48138,11 @@ pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
+pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49846,15 +48160,15 @@ pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
+pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
     unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49872,11 +48186,11 @@ pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
+pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49894,10 +48208,10 @@ pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
+pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
     unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int8x16_t = transmute(a);
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t = transmute(a);
         simd_shuffle!(
             ret_val,
             ret_val,
@@ -49906,7 +48220,7 @@ pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49924,11 +48238,11 @@ pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
+pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49946,15 +48260,15 @@ pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
+pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
     unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49972,11 +48286,11 @@ pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
+pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49994,15 +48308,15 @@ pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
+pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
     unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50020,11 +48334,11 @@ pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
+pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50042,15 +48356,19 @@ pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
+pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
     unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50068,11 +48386,11 @@ pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
+pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50090,19 +48408,15 @@ pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
+pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
     unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50120,11 +48434,11 @@ pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
+pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50142,15 +48456,15 @@ pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
+pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
     unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint16x8_t = transmute(a);
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: int8x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50168,11 +48482,11 @@ pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
+pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50190,15 +48504,15 @@ pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
+pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
     unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint32x4_t = transmute(a);
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: int16x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50216,11 +48530,11 @@ pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
+pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50238,19 +48552,14 @@ pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
+pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
     unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50268,11 +48577,11 @@ pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
+pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50290,15 +48599,15 @@ pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
+pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
     unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly16x8_t = transmute(a);
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint8x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50316,11 +48625,11 @@ pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
+pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50338,15 +48647,15 @@ pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
+pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
     unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50364,11 +48673,11 @@ pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
+pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50386,15 +48695,14 @@ pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
+pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
     unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50412,11 +48720,11 @@ pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
+pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50434,15 +48742,15 @@ pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
+pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
     unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: poly8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50460,11 +48768,11 @@ pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
+pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50482,15 +48790,15 @@ pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
+pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
     unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: poly16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50508,11 +48816,11 @@ pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
+pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50530,14 +48838,14 @@ pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
+pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
     unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
         transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50555,11 +48863,11 @@ pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
+pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50577,15 +48885,14 @@ pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
+pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
     unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50603,11 +48910,11 @@ pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
+pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50625,15 +48932,19 @@ pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
+pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
     unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50651,11 +48962,11 @@ pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
+pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50673,15 +48984,15 @@ pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
+pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
     unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50699,11 +49010,11 @@ pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
+pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50721,14 +49032,15 @@ pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
+pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
     unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50746,11 +49058,11 @@ pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
+pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50768,15 +49080,19 @@ pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
+pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
     unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50794,11 +49110,11 @@ pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
+pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50816,16 +49132,15 @@ pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
+pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
     unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50843,11 +49158,11 @@ pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
+pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50865,20 +49180,15 @@ pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
+pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
     unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50896,11 +49206,11 @@ pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
+pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50918,16 +49228,19 @@ pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
+pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
     unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50945,11 +49258,11 @@ pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
+pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50967,16 +49280,15 @@ pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
+pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
     unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50994,11 +49306,11 @@ pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
+pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51016,16 +49328,15 @@ pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
+pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
     unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int64x2_t = transmute(a);
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly64x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51043,11 +49354,11 @@ pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
+pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51065,20 +49376,14 @@ pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
+pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
     unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let ret_val: float32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51096,11 +49401,11 @@ pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
+pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51118,16 +49423,14 @@ pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
+pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
     unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint16x8_t = transmute(a);
+        let ret_val: int8x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51145,11 +49448,11 @@ pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
+pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51167,16 +49470,14 @@ pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
+pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
     unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint32x4_t = transmute(a);
+        let ret_val: int16x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51194,11 +49495,11 @@ pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
+pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51216,16 +49517,14 @@ pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
+pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
     unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint64x2_t = transmute(a);
+        let ret_val: int32x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51243,11 +49542,11 @@ pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
+pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51265,16 +49564,14 @@ pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
+pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
     unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly16x8_t = transmute(a);
+        let ret_val: uint8x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51292,11 +49589,11 @@ pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
+pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51314,15 +49611,14 @@ pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
+pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
     unsafe {
-        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let ret_val: uint16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51340,11 +49636,11 @@ pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
+pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51362,15 +49658,14 @@ pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
+pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
     unsafe {
-        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let ret_val: uint32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51388,11 +49683,11 @@ pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
+pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51410,15 +49705,14 @@ pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
+pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
     unsafe {
-        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let ret_val: poly8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51436,11 +49730,11 @@ pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
+pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51458,15 +49752,14 @@ pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
+pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
     unsafe {
-        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let ret_val: poly16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51484,11 +49777,11 @@ pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
+pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51506,14 +49799,14 @@ pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
+pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
     unsafe {
-        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
         transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51531,11 +49824,11 @@ pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
+pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51553,15 +49846,15 @@ pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
+pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
     unsafe {
-        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: float32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51579,11 +49872,11 @@ pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
+pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51601,15 +49894,19 @@ pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
+pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
     unsafe {
-        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: int8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51627,11 +49924,11 @@ pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
+pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51649,15 +49946,15 @@ pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
+pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
     unsafe {
-        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: int16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51675,11 +49972,11 @@ pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
+pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51697,14 +49994,15 @@ pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
+pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
     unsafe {
-        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: int32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51722,11 +50020,11 @@ pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
+pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51744,15 +50042,19 @@ pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
+pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
     unsafe {
-        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51770,11 +50072,11 @@ pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
+pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51792,15 +50094,15 @@ pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
+pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
     unsafe {
-        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51818,11 +50120,11 @@ pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
+pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51840,19 +50142,15 @@ pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
+pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
     unsafe {
-        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51870,11 +50168,11 @@ pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
+pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51892,15 +50190,19 @@ pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
+pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
     unsafe {
-        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: poly8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51918,11 +50220,11 @@ pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
+pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51940,15 +50242,15 @@ pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
+pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
     unsafe {
-        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: poly16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51966,11 +50268,11 @@ pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
+pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51988,15 +50290,15 @@ pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
+pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
     unsafe {
-        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int64x2_t = transmute(a);
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float32x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52014,11 +50316,11 @@ pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
+pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52036,19 +50338,15 @@ pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
+pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
     unsafe {
-        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52066,11 +50364,11 @@ pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
+pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52088,15 +50386,15 @@ pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
+pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
     unsafe {
-        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52114,11 +50412,11 @@ pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
+pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52136,15 +50434,14 @@ pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
+pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
     unsafe {
-        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52162,11 +50459,11 @@ pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
+pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52184,15 +50481,15 @@ pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
+pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
     unsafe {
-        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52210,11 +50507,11 @@ pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
+pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52232,23 +50529,19 @@ pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
+pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
     unsafe {
-        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52262,15 +50555,15 @@ pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
+pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52284,22 +50577,18 @@ pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
+pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
     unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52313,15 +50602,15 @@ pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
+pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52335,18 +50624,19 @@ pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
+pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
     unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52360,15 +50650,15 @@ pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
+pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52382,18 +50672,18 @@ pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
+pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
     unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52407,15 +50697,15 @@ pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
+pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52429,18 +50719,19 @@ pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
+pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
     unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52454,15 +50745,15 @@ pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
+pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52473,25 +50764,23 @@ pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
     stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
-    unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52505,15 +50794,15 @@ pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
+pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52527,18 +50816,20 @@ pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
+pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
     unsafe {
-        let ret_val: uint16x8_t = transmute(a);
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52552,15 +50843,15 @@ pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
+pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52574,18 +50865,20 @@ pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
+pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
     unsafe {
-        let ret_val: uint32x4_t = transmute(a);
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int32x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52599,15 +50892,15 @@ pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
+pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52621,18 +50914,20 @@ pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
+pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
     unsafe {
-        let ret_val: uint64x2_t = transmute(a);
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int64x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52646,15 +50941,15 @@ pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
+pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52668,22 +50963,20 @@ pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
+pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
     unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52697,15 +50990,15 @@ pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
+pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52719,18 +51012,20 @@ pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
+pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
     unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52744,15 +51039,15 @@ pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
+pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52766,18 +51061,20 @@ pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
+pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
     unsafe {
-        let ret_val: poly64x2_t = transmute(a);
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint64x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52791,15 +51088,15 @@ pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
+pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52813,18 +51110,20 @@ pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
+pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
     unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52838,15 +51137,15 @@ pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
+pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52860,19 +51159,20 @@ pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
+pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
     unsafe {
-        let a: int8x16_t =
+        let a: poly8x16_t =
             simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
+        let ret_val: poly64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52886,15 +51186,15 @@ pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
+pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52908,20 +51208,19 @@ pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
+pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
     unsafe {
-        let a: int8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly64x2_t = transmute(a);
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: float32x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52935,15 +51234,15 @@ pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
+pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52957,18 +51256,19 @@ pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
+pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
     unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -52982,15 +51282,15 @@ pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
+pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53004,18 +51304,19 @@ pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
+pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
     unsafe {
-        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53029,15 +51330,15 @@ pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
+pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53051,19 +51352,18 @@ pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
+pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
     unsafe {
-        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53077,15 +51377,15 @@ pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
+pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53099,18 +51399,19 @@ pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
+pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
     unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        transmute(a)
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53124,15 +51425,15 @@ pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
+pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53146,18 +51447,19 @@ pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
+pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
     unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53171,15 +51473,15 @@ pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
+pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53193,19 +51495,18 @@ pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
+pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
     unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53219,15 +51520,15 @@ pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
+pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53241,18 +51542,19 @@ pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
+pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
     unsafe {
-        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
-        transmute(a)
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53266,15 +51568,15 @@ pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
+pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53288,18 +51590,18 @@ pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
+pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
     unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
         transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53313,15 +51615,15 @@ pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
+pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53335,19 +51637,18 @@ pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
+pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
     unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
         transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53361,15 +51662,15 @@ pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
+pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53383,20 +51684,19 @@ pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
+pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
     unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53410,15 +51710,15 @@ pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
+pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53432,18 +51732,23 @@ pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
+pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
     unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53457,15 +51762,15 @@ pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
+pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53479,18 +51784,19 @@ pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
+pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
     unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53504,15 +51810,15 @@ pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
+pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53526,19 +51832,19 @@ pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
+pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
     unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly64x2_t = transmute(a);
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int64x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53552,15 +51858,15 @@ pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
+pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53574,18 +51880,23 @@ pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
+pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
     unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
-        transmute(a)
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53599,15 +51910,15 @@ pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
+pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53621,18 +51932,19 @@ pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
+pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
     unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53646,15 +51958,15 @@ pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
+pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53668,19 +51980,19 @@ pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
+pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
     unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly64x2_t = transmute(a);
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint64x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53694,15 +52006,15 @@ pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
+pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53716,18 +52028,23 @@ pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
+pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
     unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        transmute(a)
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53741,15 +52058,15 @@ pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
+pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53763,18 +52080,19 @@ pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
+pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
     unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53788,15 +52106,15 @@ pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
+pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53810,19 +52128,18 @@ pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
+pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
     unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
+        let ret_val: int8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53836,15 +52153,15 @@ pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
+pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53858,20 +52175,18 @@ pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
+pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
     unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let ret_val: int16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53885,15 +52200,15 @@ pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
+pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53907,18 +52222,18 @@ pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
+pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
     unsafe {
-        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
+        let ret_val: int32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53932,15 +52247,15 @@ pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
+pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53954,18 +52269,18 @@ pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
+pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
     unsafe {
-        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
+        let ret_val: uint8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -53979,15 +52294,15 @@ pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
+pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54001,19 +52316,18 @@ pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
+pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
     unsafe {
-        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let ret_val: uint16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54027,15 +52341,15 @@ pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
+pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54049,18 +52363,18 @@ pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
+pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
     unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let ret_val: uint32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54074,15 +52388,15 @@ pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
+pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54096,18 +52410,18 @@ pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
+pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
     unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let ret_val: poly8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54121,15 +52435,15 @@ pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
+pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54143,18 +52457,18 @@ pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
+pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
     unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let ret_val: poly16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54168,15 +52482,15 @@ pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
+pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54190,18 +52504,23 @@ pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
+pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
     unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: int8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54215,15 +52534,15 @@ pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
+pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54237,18 +52556,19 @@ pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
+pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
     unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: int16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54262,15 +52582,15 @@ pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
+pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54284,18 +52604,19 @@ pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
+pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
     unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: int32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54309,15 +52630,15 @@ pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
+pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54331,18 +52652,23 @@ pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
+pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
     unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54356,15 +52682,15 @@ pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
+pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54378,18 +52704,19 @@ pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
+pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
     unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54403,15 +52730,15 @@ pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 {
+pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54425,18 +52752,19 @@ pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 {
+pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
     unsafe {
         let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
-        transmute(a)
+        let ret_val: uint32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54450,15 +52778,15 @@ pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
+pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54472,10 +52800,10 @@ pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
+pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
     unsafe {
         let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int8x16_t = transmute(a);
+        let ret_val: poly8x16_t = transmute(a);
         simd_shuffle!(
             ret_val,
             ret_val,
@@ -54484,11 +52812,11 @@ pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54502,15 +52830,15 @@ pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
+pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54524,17 +52852,16 @@ pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
+pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
     unsafe {
         let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int16x8_t = transmute(a);
+        let ret_val: poly16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54550,13 +52877,12 @@ pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
+pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54572,17 +52898,12 @@ pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
-    unsafe {
-        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54598,13 +52919,12 @@ pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
+pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54620,21 +52940,12 @@ pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
-    unsafe {
-        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54650,13 +52961,12 @@ pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
+pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54672,17 +52982,12 @@ pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
-    unsafe {
-        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54698,13 +53003,12 @@ pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
+pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54720,17 +53024,12 @@ pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
-    unsafe {
-        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54746,13 +53045,12 @@ pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
+pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54768,21 +53066,12 @@ pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
-    unsafe {
-        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54798,13 +53087,12 @@ pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
+pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -54820,12 +53108,8 @@ pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
-    unsafe {
-        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 {
+    unsafe { transmute(a) }
 }
 #[doc = "Reversing vector elements (swap endianness)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_p8)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 401bb504b3a43..0a466dda713e0 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -8523,7 +8523,6 @@ intrinsics:
             - link: "llvm.aarch64.neon.frecpx.{type[1]}"
               arch: aarch64,arm64ec
 
-
   - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}"
     doc: Vector reinterpret cast operation
     arguments: ["a: {type[0]}"]
@@ -8531,6 +8530,7 @@ intrinsics:
     attr: [*neon-stable]
     assert_instr: [nop]
     safety: safe
+    big_endian_inverse: false
     types:
       - [poly64x1_t, int64x1_t]
       - [poly64x1_t, uint64x1_t]
@@ -8540,54 +8540,65 @@ intrinsics:
       - [poly64x2_t, uint64x2_t]
       - [int64x2_t, poly64x2_t]
       - [uint64x2_t, poly64x2_t]
+      - [float64x1_t, int64x1_t]
+      - [float64x2_t, int64x2_t]
+      - [float64x1_t, uint64x1_t]
+      - [float64x2_t, uint64x2_t]
+      - [float64x1_t, poly64x1_t]
+      - [float64x2_t, poly64x2_t]
+      - [int64x1_t, float64x1_t]
+      - [int64x2_t, float64x2_t]
+      - [uint64x1_t, float64x1_t]
+      - [uint64x2_t, float64x2_t]
+      - [poly64x1_t, float64x1_t]
+      - [poly64x2_t, float64x2_t]
+    compose:
+      - FnCall: [transmute, [a]]
+
+  - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}"
+    doc: Vector reinterpret cast operation
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr: [*neon-stable]
+    assert_instr: [nop]
+    safety: safe
+    types:
       - [float64x1_t, int8x8_t]
       - [float64x1_t, int16x4_t]
       - [float64x1_t, int32x2_t]
-      - [float64x1_t, int64x1_t]
       - [float64x2_t, int8x16_t]
       - [float64x2_t, int16x8_t]
       - [float64x2_t, int32x4_t]
-      - [float64x2_t, int64x2_t]
       - [float64x1_t, uint8x8_t]
       - [float64x1_t, uint16x4_t]
       - [float64x1_t, uint32x2_t]
-      - [float64x1_t, uint64x1_t]
       - [float64x2_t, uint8x16_t]
       - [float64x2_t, uint16x8_t]
       - [float64x2_t, uint32x4_t]
-      - [float64x2_t, uint64x2_t]
       - [float64x1_t, poly8x8_t]
       - [float64x1_t, poly16x4_t]
       - [float32x2_t, poly64x1_t]
-      - [float64x1_t, poly64x1_t]
       - [float64x2_t, poly8x16_t]
       - [float64x2_t, poly16x8_t]
       - [float32x4_t, poly64x2_t]
-      - [float64x2_t, poly64x2_t]
       - [float64x2_t, p128]
       - [int8x8_t, float64x1_t]
       - [int16x4_t, float64x1_t]
       - [int32x2_t, float64x1_t]
-      - [int64x1_t, float64x1_t]
       - [int8x16_t, float64x2_t]
       - [int16x8_t, float64x2_t]
       - [int32x4_t, float64x2_t]
-      - [int64x2_t, float64x2_t]
       - [poly8x8_t, float64x1_t]
       - [uint16x4_t, float64x1_t]
       - [uint32x2_t, float64x1_t]
-      - [uint64x1_t, float64x1_t]
       - [poly8x16_t, float64x2_t]
       - [uint16x8_t, float64x2_t]
       - [uint32x4_t, float64x2_t]
-      - [uint64x2_t, float64x2_t]
       - [uint8x8_t, float64x1_t]
       - [poly16x4_t, float64x1_t]
-      - [poly64x1_t, float64x1_t]
       - [poly64x1_t, float32x2_t]
       - [uint8x16_t, float64x2_t]
       - [poly16x8_t, float64x2_t]
-      - [poly64x2_t, float64x2_t]
       - [poly64x2_t, float32x4_t]
       - [p128, float64x2_t]
       - [float32x2_t, float64x1_t]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
index 1f7e1f6987a5a..b6683080348a0 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@@ -8476,33 +8476,48 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: false
     types:
-      - [poly64x1_t, int32x2_t]
-      - [poly64x1_t, uint32x2_t]
-      - [poly64x2_t, int32x4_t]
-      - [poly64x2_t, uint32x4_t]
       - [p128, int64x2_t]
       - [p128, uint64x2_t]
       - [p128, poly64x2_t]
-      - [poly8x16_t, p128]
       - [p128, int8x16_t]
       - [p128, uint8x16_t]
       - [p128, poly8x16_t]
+      - [poly64x2_t, p128]
+      - [p128, int32x4_t]
+      - [p128, uint32x4_t]
+      - [p128, int16x8_t]
+      - [p128, uint16x8_t]
+      - [p128, poly16x8_t]
+    compose:
+      - FnCall: [transmute, [a]]
+
+  - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}"
+    doc: Vector reinterpret cast operation
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
+      - *neon-not-arm-stable
+      - *neon-cfg-arm-unstable
+    safety: safe
+    types:
+      - [poly64x1_t, int32x2_t]
+      - [poly64x1_t, uint32x2_t]
+      - [poly8x16_t, p128]
       - [int32x2_t, poly64x1_t]
       - [uint32x2_t, poly64x1_t]
       - [int32x4_t, poly64x2_t]
       - [uint32x4_t, poly64x2_t]
       - [int64x2_t, p128]
       - [uint64x2_t, p128]
-      - [poly64x2_t, p128]
       - [poly64x1_t, int16x4_t]
       - [poly64x1_t, uint16x4_t]
       - [poly64x1_t, poly16x4_t]
-      - [poly64x2_t, int16x8_t]
-      - [poly64x2_t, uint16x8_t]
       - [poly64x2_t, poly16x8_t]
-      - [p128, int32x4_t]
-      - [p128, uint32x4_t]
       - [poly16x4_t, poly64x1_t]
       - [int16x4_t, poly64x1_t]
       - [uint16x4_t, poly64x1_t]
@@ -8514,12 +8529,7 @@ intrinsics:
       - [poly64x1_t, int8x8_t]
       - [poly64x1_t, uint8x8_t]
       - [poly64x1_t, poly8x8_t]
-      - [poly64x2_t, int8x16_t]
-      - [poly64x2_t, uint8x16_t]
       - [poly64x2_t, poly8x16_t]
-      - [p128, int16x8_t]
-      - [p128, uint16x8_t]
-      - [p128, poly16x8_t]
       - [poly8x8_t, poly64x1_t]
       - [int8x8_t, poly64x1_t]
       - [uint8x8_t, poly64x1_t]
@@ -8531,53 +8541,12 @@ intrinsics:
       - [poly16x8_t, p128]
       - [int8x16_t, p128]
       - [uint8x16_t, p128]
-    compose:
-      - FnCall: [transmute, [a]]
-
-  - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}"
-    doc: Vector reinterpret cast operation
-    arguments: ["a: {type[0]}"]
-    return_type: "{type[1]}"
-    attr:
-      - *neon-v7
-      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
-      - *neon-not-arm-stable
-      - *neon-cfg-arm-unstable
-    safety: safe
-    types:
-      - [uint8x8_t, int8x8_t]
-      - [poly8x8_t, int8x8_t]
-      - [poly16x4_t, int16x4_t]
-      - [uint16x4_t, int16x4_t]
-      - [uint32x2_t, int32x2_t]
-      - [uint64x1_t, int64x1_t]
-      - [uint8x16_t, int8x16_t]
-      - [poly8x16_t, int8x16_t]
-      - [poly16x8_t, int16x8_t]
-      - [uint16x8_t, int16x8_t]
-      - [uint32x4_t, int32x4_t]
-      - [uint64x2_t, int64x2_t]
-      - [poly8x8_t, uint8x8_t]
-      - [int8x8_t, uint8x8_t]
-      - [poly16x4_t, uint16x4_t]
-      - [int16x4_t, uint16x4_t]
-      - [int32x2_t, uint32x2_t]
-      - [int64x1_t, uint64x1_t]
-      - [poly8x16_t, uint8x16_t]
-      - [int8x16_t, uint8x16_t]
-      - [poly16x8_t, uint16x8_t]
-      - [int16x8_t, uint16x8_t]
-      - [int32x4_t, uint32x4_t]
-      - [int64x2_t, uint64x2_t]
-      - [int8x8_t, poly8x8_t]
-      - [uint8x8_t, poly8x8_t]
-      - [int16x4_t, poly16x4_t]
-      - [uint16x4_t, poly16x4_t]
-      - [int8x16_t, poly8x16_t]
-      - [uint8x16_t, poly8x16_t]
-      - [int16x8_t, poly16x8_t]
-      - [uint16x8_t, poly16x8_t]
+      - [poly64x2_t, int32x4_t]
+      - [poly64x2_t, uint32x4_t]
+      - [poly64x2_t, int16x8_t]
+      - [poly64x2_t, uint16x8_t]
+      - [poly64x2_t, int8x16_t]
+      - [poly64x2_t, uint8x16_t]
       - [int16x4_t, int8x8_t]
       - [uint16x4_t, int8x8_t]
       - [poly16x4_t, int8x8_t]
@@ -8728,19 +8697,15 @@ intrinsics:
       - [uint8x16_t, uint64x2_t]
       - [float32x2_t, int8x8_t]
       - [float32x2_t, int16x4_t]
-      - [float32x2_t, int32x2_t]
       - [float32x2_t, int64x1_t]
       - [float32x4_t, int8x16_t]
       - [float32x4_t, int16x8_t]
-      - [float32x4_t, int32x4_t]
       - [float32x4_t, int64x2_t]
       - [float32x2_t, uint8x8_t]
       - [float32x2_t, uint16x4_t]
-      - [float32x2_t, uint32x2_t]
       - [float32x2_t, uint64x1_t]
       - [float32x4_t, uint8x16_t]
       - [float32x4_t, uint16x8_t]
-      - [float32x4_t, uint32x4_t]
       - [float32x4_t, uint64x2_t]
       - [float32x2_t, poly8x8_t]
       - [float32x2_t, poly16x4_t]
@@ -8749,19 +8714,15 @@ intrinsics:
       - [float32x4_t, p128]
       - [int8x8_t, float32x2_t]
       - [int16x4_t, float32x2_t]
-      - [int32x2_t, float32x2_t]
       - [int64x1_t, float32x2_t]
       - [int8x16_t, float32x4_t]
       - [int16x8_t, float32x4_t]
-      - [int32x4_t, float32x4_t]
       - [int64x2_t, float32x4_t]
       - [uint8x8_t, float32x2_t]
       - [uint16x4_t, float32x2_t]
-      - [uint32x2_t, float32x2_t]
       - [uint64x1_t, float32x2_t]
       - [uint8x16_t, float32x4_t]
       - [uint16x8_t, float32x4_t]
-      - [uint32x4_t, float32x4_t]
       - [uint64x2_t, float32x4_t]
       - [poly8x8_t, float32x2_t]
       - [poly16x4_t, float32x2_t]
@@ -8771,6 +8732,61 @@ intrinsics:
     compose:
       - FnCall: [transmute, [a]]
 
+  - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}"
+    doc: Vector reinterpret cast operation
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
+      - *neon-not-arm-stable
+      - *neon-cfg-arm-unstable
+    safety: safe
+    big_endian_inverse: false
+    types:
+      - [uint8x8_t, int8x8_t]
+      - [poly8x8_t, int8x8_t]
+      - [poly16x4_t, int16x4_t]
+      - [uint16x4_t, int16x4_t]
+      - [uint32x2_t, int32x2_t]
+      - [uint64x1_t, int64x1_t]
+      - [uint8x16_t, int8x16_t]
+      - [poly8x16_t, int8x16_t]
+      - [poly16x8_t, int16x8_t]
+      - [uint16x8_t, int16x8_t]
+      - [uint32x4_t, int32x4_t]
+      - [uint64x2_t, int64x2_t]
+      - [poly8x8_t, uint8x8_t]
+      - [int8x8_t, uint8x8_t]
+      - [poly16x4_t, uint16x4_t]
+      - [int16x4_t, uint16x4_t]
+      - [int32x2_t, uint32x2_t]
+      - [int64x1_t, uint64x1_t]
+      - [poly8x16_t, uint8x16_t]
+      - [int8x16_t, uint8x16_t]
+      - [poly16x8_t, uint16x8_t]
+      - [int16x8_t, uint16x8_t]
+      - [int32x4_t, uint32x4_t]
+      - [int64x2_t, uint64x2_t]
+      - [int8x8_t, poly8x8_t]
+      - [uint8x8_t, poly8x8_t]
+      - [int16x4_t, poly16x4_t]
+      - [uint16x4_t, poly16x4_t]
+      - [int8x16_t, poly8x16_t]
+      - [uint8x16_t, poly8x16_t]
+      - [int16x8_t, poly16x8_t]
+      - [uint16x8_t, poly16x8_t]
+      - [float32x2_t, int32x2_t]
+      - [float32x4_t, int32x4_t]
+      - [float32x2_t, uint32x2_t]
+      - [float32x4_t, uint32x4_t]
+      - [int32x2_t, float32x2_t]
+      - [int32x4_t, float32x4_t]
+      - [uint32x2_t, float32x2_t]
+      - [uint32x4_t, float32x4_t]
+    compose:
+      - FnCall: [transmute, [a]]
 
   - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}"
     doc: Vector reinterpret cast operation
@@ -8787,54 +8803,74 @@ intrinsics:
     types:
       # non-q
       - [float32x2_t, float16x4_t]
-      - [poly16x4_t, float16x4_t]
       - [poly8x8_t, float16x4_t]
       - [int8x8_t, float16x4_t]
-      - [int16x4_t, float16x4_t]
       - [int32x2_t, float16x4_t]
       - [int64x1_t, float16x4_t]
       - [uint8x8_t, float16x4_t]
-      - [uint16x4_t, float16x4_t]
       - [uint32x2_t, float16x4_t]
       - [uint64x1_t, float16x4_t]
       - [float16x4_t, float32x2_t]
-      - [float16x4_t, poly16x4_t]
       - [float16x4_t, poly8x8_t]
       - [float16x4_t, int8x8_t]
-      - [float16x4_t, int16x4_t]
       - [float16x4_t, int32x2_t]
       - [float16x4_t, int64x1_t]
       - [float16x4_t, uint8x8_t]
-      - [float16x4_t, uint16x4_t]
       - [float16x4_t, uint32x2_t]
       - [float16x4_t, uint64x1_t]
       # q
       - [float32x4_t, float16x8_t]
-      - [poly16x8_t, float16x8_t]
       - [poly8x16_t, float16x8_t]
       - [int8x16_t, float16x8_t]
-      - [int16x8_t, float16x8_t]
       - [int32x4_t, float16x8_t]
       - [int64x2_t, float16x8_t]
       - [uint8x16_t, float16x8_t]
-      - [uint16x8_t, float16x8_t]
       - [uint32x4_t, float16x8_t]
       - [uint64x2_t, float16x8_t]
       - [float16x8_t, float32x4_t]
-      - [float16x8_t, poly16x8_t]
       - [float16x8_t, poly8x16_t]
       - [float16x8_t, int8x16_t]
-      - [float16x8_t, int16x8_t]
       - [float16x8_t, int32x4_t]
       - [float16x8_t, int64x2_t]
       - [float16x8_t, uint8x16_t]
-      - [float16x8_t, uint16x8_t]
       - [float16x8_t, uint32x4_t]
       - [float16x8_t, uint64x2_t]
     compose:
       - FnCall: [transmute, [a]]
 
 
+  - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}"
+    doc: Vector reinterpret cast operation
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
+      - *target-not-arm64ec
+    safety: safe
+    big_endian_inverse: false
+    types:
+      # non-q
+      - [poly16x4_t, float16x4_t]
+      - [int16x4_t, float16x4_t]
+      - [uint16x4_t, float16x4_t]
+      - [float16x4_t, poly16x4_t]
+      - [float16x4_t, int16x4_t]
+      - [float16x4_t, uint16x4_t]
+      # q
+      - [poly16x8_t, float16x8_t]
+      - [int16x8_t, float16x8_t]
+      - [uint16x8_t, float16x8_t]
+      - [float16x8_t, poly16x8_t]
+      - [float16x8_t, int16x8_t]
+      - [float16x8_t, uint16x8_t]
+    compose:
+      - FnCall: [transmute, [a]]
+
+
   - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}"
     doc: Vector reinterpret cast operation
     arguments: ["a: {type[0]}"]

From 1d3d92207adcf82c0d9552c6413c42504fbf5592 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Sun, 10 May 2026 04:43:15 +0530
Subject: [PATCH 42/78] Remove the heuristic for big-endian reversal, reversing
 only if requested specifically

---
 .../core_arch/src/aarch64/neon/generated.rs   |  223 --
 .../src/arm_shared/neon/generated.rs          | 1817 -----------------
 .../spec/neon/aarch64.spec.yml                |   36 +-
 .../spec/neon/arm_shared.spec.yml             |   20 +-
 .../crates/stdarch-gen-arm/src/intrinsic.rs   |   19 +-
 5 files changed, 11 insertions(+), 2104 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 7ffeb5e639bd4..0f9f2997b6988 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -9469,7 +9469,6 @@ pub unsafe fn vld2_lane_u64<const LANE: i32>(a: *const u64, b: uint64x1x2_t) ->
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2r))]
@@ -9477,47 +9476,16 @@ pub unsafe fn vld2q_dup_p64(a: *const p64) -> poly64x2x2_t {
     transmute(vld2q_dup_s64(transmute(a)))
 }
 #[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2q_dup_p64(a: *const p64) -> poly64x2x2_t {
-    let mut ret_val: poly64x2x2_t = transmute(vld2q_dup_s64(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
-    ret_val
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2r))]
 pub unsafe fn vld2q_dup_u64(a: *const u64) -> uint64x2x2_t {
     transmute(vld2q_dup_s64(transmute(a)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2q_dup_u64(a: *const u64) -> uint64x2x2_t {
-    let mut ret_val: uint64x2x2_t = transmute(vld2q_dup_s64(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
-    ret_val
-}
 #[doc = "Load multiple 2-element structures to two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f64)"]
 #[doc = "## Safety"]
@@ -9672,7 +9640,6 @@ pub unsafe fn vld2q_lane_p8<const LANE: i32>(a: *const p8, b: poly8x16x2_t) -> p
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2))]
@@ -9680,21 +9647,6 @@ pub unsafe fn vld2q_p64(a: *const p64) -> poly64x2x2_t {
     transmute(vld2q_s64(transmute(a)))
 }
 #[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2q_p64(a: *const p64) -> poly64x2x2_t {
-    let mut ret_val: poly64x2x2_t = transmute(vld2q_s64(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
-    ret_val
-}
-#[doc = "Load multiple 2-element structures to two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
@@ -9853,7 +9805,6 @@ pub unsafe fn vld3_lane_u64<const LANE: i32>(a: *const u64, b: uint64x1x3_t) ->
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld3r))]
@@ -9861,49 +9812,16 @@ pub unsafe fn vld3q_dup_p64(a: *const p64) -> poly64x2x3_t {
     transmute(vld3q_dup_s64(transmute(a)))
 }
 #[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3q_dup_p64(a: *const p64) -> poly64x2x3_t {
-    let mut ret_val: poly64x2x3_t = transmute(vld3q_dup_s64(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
-    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
-    ret_val
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld3r))]
 pub unsafe fn vld3q_dup_u64(a: *const u64) -> uint64x2x3_t {
     transmute(vld3q_dup_s64(transmute(a)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3q_dup_u64(a: *const u64) -> uint64x2x3_t {
-    let mut ret_val: uint64x2x3_t = transmute(vld3q_dup_s64(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
-    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
-    ret_val
-}
 #[doc = "Load multiple 3-element structures to three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f64)"]
 #[doc = "## Safety"]
@@ -10061,7 +9979,6 @@ pub unsafe fn vld3q_lane_p8<const LANE: i32>(a: *const p8, b: poly8x16x3_t) -> p
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld3))]
@@ -10069,22 +9986,6 @@ pub unsafe fn vld3q_p64(a: *const p64) -> poly64x2x3_t {
     transmute(vld3q_s64(transmute(a)))
 }
 #[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3q_p64(a: *const p64) -> poly64x2x3_t {
-    let mut ret_val: poly64x2x3_t = transmute(vld3q_s64(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
-    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
-    ret_val
-}
-#[doc = "Load multiple 3-element structures to three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
@@ -10245,7 +10146,6 @@ pub unsafe fn vld4_lane_u64<const LANE: i32>(a: *const u64, b: uint64x1x4_t) ->
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -10253,51 +10153,16 @@ pub unsafe fn vld4q_dup_p64(a: *const p64) -> poly64x2x4_t {
     transmute(vld4q_dup_s64(transmute(a)))
 }
 #[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_p64(a: *const p64) -> poly64x2x4_t {
-    let mut ret_val: poly64x2x4_t = transmute(vld4q_dup_s64(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
-    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
-    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [1, 0]);
-    ret_val
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4q_dup_u64(a: *const u64) -> uint64x2x4_t {
     transmute(vld4q_dup_s64(transmute(a)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_u64(a: *const u64) -> uint64x2x4_t {
-    let mut ret_val: uint64x2x4_t = transmute(vld4q_dup_s64(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
-    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
-    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [1, 0]);
-    ret_val
-}
 #[doc = "Load multiple 4-element structures to four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f64)"]
 #[doc = "## Safety"]
@@ -10458,7 +10323,6 @@ pub unsafe fn vld4q_lane_p8<const LANE: i32>(a: *const p8, b: poly8x16x4_t) -> p
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(test, assert_instr(ld4))]
@@ -10466,23 +10330,6 @@ pub unsafe fn vld4q_p64(a: *const p64) -> poly64x2x4_t {
     transmute(vld4q_s64(transmute(a)))
 }
 #[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4q_p64(a: *const p64) -> poly64x2x4_t {
-    let mut ret_val: poly64x2x4_t = transmute(vld4q_s64(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
-    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
-    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [1, 0]);
-    ret_val
-}
-#[doc = "Load multiple 4-element structures to four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
@@ -17516,7 +17363,6 @@ pub fn vrbitq_s8(a: int8x16_t) -> int8x16_t {
 #[doc = "Reverse bit order"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(rbit))]
@@ -17524,23 +17370,8 @@ pub fn vrbit_u8(a: uint8x8_t) -> uint8x8_t {
     unsafe { transmute(vrbit_s8(transmute(a))) }
 }
 #[doc = "Reverse bit order"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(rbit))]
-pub fn vrbit_u8(a: uint8x8_t) -> uint8x8_t {
-    unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x8_t = transmute(vrbit_s8(transmute(a)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Reverse bit order"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(rbit))]
@@ -17548,28 +17379,8 @@ pub fn vrbitq_u8(a: uint8x16_t) -> uint8x16_t {
     unsafe { transmute(vrbitq_s8(transmute(a))) }
 }
 #[doc = "Reverse bit order"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(rbit))]
-pub fn vrbitq_u8(a: uint8x16_t) -> uint8x16_t {
-    unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x16_t = transmute(vrbitq_s8(transmute(a)));
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Reverse bit order"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(rbit))]
@@ -17577,48 +17388,14 @@ pub fn vrbit_p8(a: poly8x8_t) -> poly8x8_t {
     unsafe { transmute(vrbit_s8(transmute(a))) }
 }
 #[doc = "Reverse bit order"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(rbit))]
-pub fn vrbit_p8(a: poly8x8_t) -> poly8x8_t {
-    unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly8x8_t = transmute(vrbit_s8(transmute(a)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Reverse bit order"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(rbit))]
 pub fn vrbitq_p8(a: poly8x16_t) -> poly8x16_t {
     unsafe { transmute(vrbitq_s8(transmute(a))) }
 }
-#[doc = "Reverse bit order"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(rbit))]
-pub fn vrbitq_p8(a: poly8x16_t) -> poly8x16_t {
-    unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly8x16_t = transmute(vrbitq_s8(transmute(a)));
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
 #[doc = "Reciprocal estimate."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f64)"]
 #[inline]
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index 968206fc5ef4f..f3df62af6a82f 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -7342,7 +7342,6 @@ pub fn vclzq_s32(a: int32x4_t) -> int32x4_t {
 #[doc = "Count leading zero bits"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
@@ -7362,35 +7361,8 @@ pub fn vclz_u16(a: uint16x4_t) -> uint16x4_t {
     unsafe { transmute(vclz_s16(transmute(a))) }
 }
 #[doc = "Count leading zero bits"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(clz)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vclz_u16(a: uint16x4_t) -> uint16x4_t {
-    unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint16x4_t = transmute(vclz_s16(transmute(a)));
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Count leading zero bits"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
@@ -7410,35 +7382,8 @@ pub fn vclzq_u16(a: uint16x8_t) -> uint16x8_t {
     unsafe { transmute(vclzq_s16(transmute(a))) }
 }
 #[doc = "Count leading zero bits"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(clz)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vclzq_u16(a: uint16x8_t) -> uint16x8_t {
-    unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint16x8_t = transmute(vclzq_s16(transmute(a)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Count leading zero bits"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
@@ -7458,35 +7403,8 @@ pub fn vclz_u32(a: uint32x2_t) -> uint32x2_t {
     unsafe { transmute(vclz_s32(transmute(a))) }
 }
 #[doc = "Count leading zero bits"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(clz)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vclz_u32(a: uint32x2_t) -> uint32x2_t {
-    unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint32x2_t = transmute(vclz_s32(transmute(a)));
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Count leading zero bits"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
@@ -7506,35 +7424,8 @@ pub fn vclzq_u32(a: uint32x4_t) -> uint32x4_t {
     unsafe { transmute(vclzq_s32(transmute(a))) }
 }
 #[doc = "Count leading zero bits"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(clz)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vclzq_u32(a: uint32x4_t) -> uint32x4_t {
-    unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint32x4_t = transmute(vclzq_s32(transmute(a)));
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Count leading zero bits"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
@@ -7554,35 +7445,8 @@ pub fn vclz_u8(a: uint8x8_t) -> uint8x8_t {
     unsafe { transmute(vclz_s8(transmute(a))) }
 }
 #[doc = "Count leading zero bits"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(clz)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vclz_u8(a: uint8x8_t) -> uint8x8_t {
-    unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x8_t = transmute(vclz_s8(transmute(a)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Count leading zero bits"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
@@ -7601,37 +7465,6 @@ pub fn vclz_u8(a: uint8x8_t) -> uint8x8_t {
 pub fn vclzq_u8(a: uint8x16_t) -> uint8x16_t {
     unsafe { transmute(vclzq_s8(transmute(a))) }
 }
-#[doc = "Count leading zero bits"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(clz)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vclzq_u8(a: uint8x16_t) -> uint8x16_t {
-    unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x16_t = transmute(vclzq_s8(transmute(a)));
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
 #[doc = "Population count per byte."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_s8)"]
 #[inline]
@@ -7677,7 +7510,6 @@ pub fn vcntq_s8(a: int8x16_t) -> int8x16_t {
 #[doc = "Population count per byte."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
@@ -7697,35 +7529,8 @@ pub fn vcnt_u8(a: uint8x8_t) -> uint8x8_t {
     unsafe { transmute(vcnt_s8(transmute(a))) }
 }
 #[doc = "Population count per byte."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cnt)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vcnt_u8(a: uint8x8_t) -> uint8x8_t {
-    unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x8_t = transmute(vcnt_s8(transmute(a)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Population count per byte."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
@@ -7745,40 +7550,8 @@ pub fn vcntq_u8(a: uint8x16_t) -> uint8x16_t {
     unsafe { transmute(vcntq_s8(transmute(a))) }
 }
 #[doc = "Population count per byte."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cnt)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vcntq_u8(a: uint8x16_t) -> uint8x16_t {
-    unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x16_t = transmute(vcntq_s8(transmute(a)));
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Population count per byte."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
@@ -7798,35 +7571,8 @@ pub fn vcnt_p8(a: poly8x8_t) -> poly8x8_t {
     unsafe { transmute(vcnt_s8(transmute(a))) }
 }
 #[doc = "Population count per byte."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cnt)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vcnt_p8(a: poly8x8_t) -> poly8x8_t {
-    unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly8x8_t = transmute(vcnt_s8(transmute(a)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Population count per byte."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
@@ -7845,37 +7591,6 @@ pub fn vcnt_p8(a: poly8x8_t) -> poly8x8_t {
 pub fn vcntq_p8(a: poly8x16_t) -> poly8x16_t {
     unsafe { transmute(vcntq_s8(transmute(a))) }
 }
-#[doc = "Population count per byte."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cnt)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vcntq_p8(a: poly8x16_t) -> poly8x16_t {
-    unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly8x16_t = transmute(vcntq_s8(transmute(a)));
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
 #[doc = "Join two smaller vectors into a single larger vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f16)"]
 #[inline]
@@ -15197,7 +14912,6 @@ pub unsafe fn vld1_dup_u64(ptr: *const u64) -> uint64x1_t {
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[target_feature(enable = "neon,fp16")]
@@ -15211,30 +14925,10 @@ pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t {
     ))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
-pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t {
-    let ret_val: float16x4_t = transmute(vld1_v4f16(
-        ptr as *const i8,
-        crate::mem::align_of::<f16>() as i32,
-    ));
-    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[target_feature(enable = "neon,fp16")]
@@ -15247,25 +14941,6 @@ pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t {
         crate::mem::align_of::<f16>() as i32,
     ))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
-pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t {
-    let ret_val: float16x8_t = transmute(vld1q_v8f16(
-        ptr as *const i8,
-        crate::mem::align_of::<f16>() as i32,
-    ));
-    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-}
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x2)"]
 #[doc = "## Safety"]
@@ -18621,7 +18296,6 @@ pub unsafe fn vld2_dup_u64(a: *const u64) -> uint64x1x2_t {
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
@@ -18641,38 +18315,10 @@ pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t {
     transmute(vld2_dup_s8(transmute(a)))
 }
 #[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t {
-    let mut ret_val: uint8x8x2_t = transmute(vld2_dup_s8(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
@@ -18692,46 +18338,10 @@ pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t {
     transmute(vld2q_dup_s8(transmute(a)))
 }
 #[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t {
-    let mut ret_val: uint8x16x2_t = transmute(vld2q_dup_s8(transmute(a)));
-    ret_val.0 = simd_shuffle!(
-        ret_val.0,
-        ret_val.0,
-        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-    );
-    ret_val.1 = simd_shuffle!(
-        ret_val.1,
-        ret_val.1,
-        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-    );
-    ret_val
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
@@ -18751,38 +18361,10 @@ pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t {
     transmute(vld2_dup_s16(transmute(a)))
 }
 #[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t {
-    let mut ret_val: uint16x4x2_t = transmute(vld2_dup_s16(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
-    ret_val
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
@@ -18802,38 +18384,10 @@ pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t {
     transmute(vld2q_dup_s16(transmute(a)))
 }
 #[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t {
-    let mut ret_val: uint16x8x2_t = transmute(vld2q_dup_s16(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
@@ -18853,38 +18407,10 @@ pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t {
     transmute(vld2_dup_s32(transmute(a)))
 }
 #[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t {
-    let mut ret_val: uint32x2x2_t = transmute(vld2_dup_s32(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
-    ret_val
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
@@ -18904,38 +18430,10 @@ pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t {
     transmute(vld2q_dup_s32(transmute(a)))
 }
 #[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t {
-    let mut ret_val: uint32x4x2_t = transmute(vld2q_dup_s32(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
-    ret_val
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
@@ -18955,38 +18453,10 @@ pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t {
     transmute(vld2_dup_s8(transmute(a)))
 }
 #[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t {
-    let mut ret_val: poly8x8x2_t = transmute(vld2_dup_s8(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
@@ -19006,46 +18476,10 @@ pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t {
     transmute(vld2q_dup_s8(transmute(a)))
 }
 #[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t {
-    let mut ret_val: poly8x16x2_t = transmute(vld2q_dup_s8(transmute(a)));
-    ret_val.0 = simd_shuffle!(
-        ret_val.0,
-        ret_val.0,
-        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-    );
-    ret_val.1 = simd_shuffle!(
-        ret_val.1,
-        ret_val.1,
-        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-    );
-    ret_val
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
@@ -19065,38 +18499,10 @@ pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t {
     transmute(vld2_dup_s16(transmute(a)))
 }
 #[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t {
-    let mut ret_val: poly16x4x2_t = transmute(vld2_dup_s16(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
-    ret_val
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
@@ -19116,33 +18522,6 @@ pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t {
     transmute(vld2q_dup_s16(transmute(a)))
 }
 #[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t {
-    let mut ret_val: poly16x8x2_t = transmute(vld2q_dup_s16(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
@@ -20892,7 +20271,6 @@ pub unsafe fn vld3_dup_u64(a: *const u64) -> uint64x1x3_t {
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
@@ -20912,39 +20290,10 @@ pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t {
     transmute(vld3_dup_s8(transmute(a)))
 }
 #[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t {
-    let mut ret_val: uint8x8x3_t = transmute(vld3_dup_s8(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
@@ -20964,51 +20313,10 @@ pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t {
     transmute(vld3q_dup_s8(transmute(a)))
 }
 #[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t {
-    let mut ret_val: uint8x16x3_t = transmute(vld3q_dup_s8(transmute(a)));
-    ret_val.0 = simd_shuffle!(
-        ret_val.0,
-        ret_val.0,
-        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-    );
-    ret_val.1 = simd_shuffle!(
-        ret_val.1,
-        ret_val.1,
-        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-    );
-    ret_val.2 = simd_shuffle!(
-        ret_val.2,
-        ret_val.2,
-        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-    );
-    ret_val
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
@@ -21028,39 +20336,10 @@ pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t {
     transmute(vld3_dup_s16(transmute(a)))
 }
 #[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t {
-    let mut ret_val: uint16x4x3_t = transmute(vld3_dup_s16(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
-    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]);
-    ret_val
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
@@ -21080,39 +20359,10 @@ pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t {
     transmute(vld3q_dup_s16(transmute(a)))
 }
 #[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t {
-    let mut ret_val: uint16x8x3_t = transmute(vld3q_dup_s16(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
@@ -21132,39 +20382,10 @@ pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t {
     transmute(vld3_dup_s32(transmute(a)))
 }
 #[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t {
-    let mut ret_val: uint32x2x3_t = transmute(vld3_dup_s32(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
-    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
-    ret_val
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
@@ -21184,39 +20405,10 @@ pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t {
     transmute(vld3q_dup_s32(transmute(a)))
 }
 #[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t {
-    let mut ret_val: uint32x4x3_t = transmute(vld3q_dup_s32(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
-    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]);
-    ret_val
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
@@ -21236,39 +20428,10 @@ pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t {
     transmute(vld3_dup_s8(transmute(a)))
 }
 #[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t {
-    let mut ret_val: poly8x8x3_t = transmute(vld3_dup_s8(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
@@ -21288,51 +20451,10 @@ pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t {
     transmute(vld3q_dup_s8(transmute(a)))
 }
 #[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t {
-    let mut ret_val: poly8x16x3_t = transmute(vld3q_dup_s8(transmute(a)));
-    ret_val.0 = simd_shuffle!(
-        ret_val.0,
-        ret_val.0,
-        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-    );
-    ret_val.1 = simd_shuffle!(
-        ret_val.1,
-        ret_val.1,
-        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-    );
-    ret_val.2 = simd_shuffle!(
-        ret_val.2,
-        ret_val.2,
-        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-    );
-    ret_val
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
@@ -21352,39 +20474,10 @@ pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t {
     transmute(vld3_dup_s16(transmute(a)))
 }
 #[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t {
-    let mut ret_val: poly16x4x3_t = transmute(vld3_dup_s16(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
-    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]);
-    ret_val
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
@@ -21403,34 +20496,6 @@ pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t {
 pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t {
     transmute(vld3q_dup_s16(transmute(a)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t {
-    let mut ret_val: poly16x8x3_t = transmute(vld3q_dup_s16(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val
-}
 #[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f16)"]
 #[doc = "## Safety"]
@@ -23161,7 +22226,6 @@ pub unsafe fn vld4_dup_u64(a: *const u64) -> uint64x1x4_t {
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
@@ -23181,40 +22245,10 @@ pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t {
     transmute(vld4_dup_s8(transmute(a)))
 }
 #[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t {
-    let mut ret_val: uint8x8x4_t = transmute(vld4_dup_s8(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
@@ -23234,56 +22268,10 @@ pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t {
     transmute(vld4q_dup_s8(transmute(a)))
 }
 #[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t {
-    let mut ret_val: uint8x16x4_t = transmute(vld4q_dup_s8(transmute(a)));
-    ret_val.0 = simd_shuffle!(
-        ret_val.0,
-        ret_val.0,
-        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-    );
-    ret_val.1 = simd_shuffle!(
-        ret_val.1,
-        ret_val.1,
-        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-    );
-    ret_val.2 = simd_shuffle!(
-        ret_val.2,
-        ret_val.2,
-        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-    );
-    ret_val.3 = simd_shuffle!(
-        ret_val.3,
-        ret_val.3,
-        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-    );
-    ret_val
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
@@ -23303,40 +22291,10 @@ pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t {
     transmute(vld4_dup_s16(transmute(a)))
 }
 #[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t {
-    let mut ret_val: uint16x4x4_t = transmute(vld4_dup_s16(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
-    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]);
-    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]);
-    ret_val
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
@@ -23356,40 +22314,10 @@ pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t {
     transmute(vld4q_dup_s16(transmute(a)))
 }
 #[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t {
-    let mut ret_val: uint16x8x4_t = transmute(vld4q_dup_s16(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
@@ -23409,40 +22337,10 @@ pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t {
     transmute(vld4_dup_s32(transmute(a)))
 }
 #[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t {
-    let mut ret_val: uint32x2x4_t = transmute(vld4_dup_s32(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
-    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
-    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [1, 0]);
-    ret_val
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
@@ -23462,40 +22360,10 @@ pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t {
     transmute(vld4q_dup_s32(transmute(a)))
 }
 #[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t {
-    let mut ret_val: uint32x4x4_t = transmute(vld4q_dup_s32(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
-    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]);
-    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]);
-    ret_val
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
@@ -23515,40 +22383,10 @@ pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t {
     transmute(vld4_dup_s8(transmute(a)))
 }
 #[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t {
-    let mut ret_val: poly8x8x4_t = transmute(vld4_dup_s8(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
@@ -23568,56 +22406,10 @@ pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t {
     transmute(vld4q_dup_s8(transmute(a)))
 }
 #[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t {
-    let mut ret_val: poly8x16x4_t = transmute(vld4q_dup_s8(transmute(a)));
-    ret_val.0 = simd_shuffle!(
-        ret_val.0,
-        ret_val.0,
-        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-    );
-    ret_val.1 = simd_shuffle!(
-        ret_val.1,
-        ret_val.1,
-        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-    );
-    ret_val.2 = simd_shuffle!(
-        ret_val.2,
-        ret_val.2,
-        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-    );
-    ret_val.3 = simd_shuffle!(
-        ret_val.3,
-        ret_val.3,
-        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-    );
-    ret_val
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
@@ -23637,40 +22429,10 @@ pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t {
     transmute(vld4_dup_s16(transmute(a)))
 }
 #[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t {
-    let mut ret_val: poly16x4x4_t = transmute(vld4_dup_s16(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
-    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]);
-    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]);
-    ret_val
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
@@ -23689,35 +22451,6 @@ pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t {
 pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t {
     transmute(vld4q_dup_s16(transmute(a)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t {
-    let mut ret_val: poly16x8x4_t = transmute(vld4q_dup_s16(transmute(a)));
-    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]);
-    ret_val
-}
 #[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f16)"]
 #[doc = "## Safety"]
@@ -38584,7 +37317,6 @@ pub fn vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
 #[doc = "Rounding Add returning High Narrow."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
@@ -38604,36 +37336,8 @@ pub fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
     unsafe { transmute(vraddhn_s16(transmute(a), transmute(b))) }
 }
 #[doc = "Rounding Add returning High Narrow."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
-    unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x8_t = transmute(vraddhn_s16(transmute(a), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Rounding Add returning High Narrow."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
@@ -38653,36 +37357,8 @@ pub fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
     unsafe { transmute(vraddhn_s32(transmute(a), transmute(b))) }
 }
 #[doc = "Rounding Add returning High Narrow."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
-    unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
-        let ret_val: uint16x4_t = transmute(vraddhn_s32(transmute(a), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Rounding Add returning High Narrow."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
@@ -38701,33 +37377,6 @@ pub fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
 pub fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
     unsafe { transmute(vraddhn_s64(transmute(a), transmute(b))) }
 }
-#[doc = "Rounding Add returning High Narrow."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
-    unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]);
-        let ret_val: uint32x2_t = transmute(vraddhn_s64(transmute(a), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
 #[doc = "Reciprocal estimate."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f16)"]
 #[inline]
@@ -56131,7 +54780,6 @@ pub fn vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
 #[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
@@ -56151,36 +54799,8 @@ pub fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
     unsafe { transmute(vrsubhn_s16(transmute(a), transmute(b))) }
 }
 #[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rsubhn)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
-    unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x8_t = transmute(vrsubhn_s16(transmute(a), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
@@ -56200,36 +54820,8 @@ pub fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
     unsafe { transmute(vrsubhn_s32(transmute(a), transmute(b))) }
 }
 #[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rsubhn)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
-    unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
-        let ret_val: uint16x4_t = transmute(vrsubhn_s32(transmute(a), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
@@ -56248,33 +54840,6 @@ pub fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
 pub fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
     unsafe { transmute(vrsubhn_s64(transmute(a), transmute(b))) }
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rsubhn)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
-    unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]);
-        let ret_val: uint32x2_t = transmute(vrsubhn_s64(transmute(a), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f16)"]
 #[inline]
@@ -68016,7 +66581,6 @@ pub fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -68026,26 +66590,8 @@ pub fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unsafe { transmute(vtbl1(transmute(a), transmute(b))) }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x8_t = transmute(vtbl1(transmute(a), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -68055,23 +66601,6 @@ pub fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t {
     unsafe { transmute(vtbl1(transmute(a), transmute(b))) }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t {
-    unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly8x8_t = transmute(vtbl1(transmute(a), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -68100,7 +66629,6 @@ pub fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t {
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -68110,28 +66638,8 @@ pub fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t {
     unsafe { transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t {
-    let mut a: uint8x8x2_t = a;
-    unsafe {
-        a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]);
-        a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x8_t = transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -68141,25 +66649,6 @@ pub fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t {
     unsafe { transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t {
-    let mut a: poly8x8x2_t = a;
-    unsafe {
-        a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]);
-        a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly8x8_t = transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -68188,7 +66677,6 @@ pub fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t {
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -68205,34 +66693,8 @@ pub fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t {
     }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t {
-    let mut a: uint8x8x3_t = a;
-    unsafe {
-        a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]);
-        a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]);
-        a.2 = simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x8_t = transmute(vtbl3(
-            transmute(a.0),
-            transmute(a.1),
-            transmute(a.2),
-            transmute(b),
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -68249,31 +66711,6 @@ pub fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t {
     }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t {
-    let mut a: poly8x8x3_t = a;
-    unsafe {
-        a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]);
-        a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]);
-        a.2 = simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly8x8_t = transmute(vtbl3(
-            transmute(a.0),
-            transmute(a.1),
-            transmute(a.2),
-            transmute(b),
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -68302,7 +66739,6 @@ pub fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t {
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -68320,36 +66756,8 @@ pub fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t {
     }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t {
-    let mut a: uint8x8x4_t = a;
-    unsafe {
-        a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]);
-        a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]);
-        a.2 = simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]);
-        a.3 = simd_shuffle!(a.3, a.3, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x8_t = transmute(vtbl4(
-            transmute(a.0),
-            transmute(a.1),
-            transmute(a.2),
-            transmute(a.3),
-            transmute(b),
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -68366,33 +66774,6 @@ pub fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t {
         ))
     }
 }
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t {
-    let mut a: poly8x8x4_t = a;
-    unsafe {
-        a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]);
-        a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]);
-        a.2 = simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]);
-        a.3 = simd_shuffle!(a.3, a.3, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly8x8_t = transmute(vtbl4(
-            transmute(a.0),
-            transmute(a.1),
-            transmute(a.2),
-            transmute(a.3),
-            transmute(b),
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1)"]
 #[inline]
@@ -68420,7 +66801,6 @@ pub fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
@@ -68429,26 +66809,8 @@ pub fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
     unsafe { transmute(vtbx1(transmute(a), transmute(b), transmute(c))) }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
-    unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x8_t = transmute(vtbx1(transmute(a), transmute(b), transmute(c)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
@@ -68457,23 +66819,6 @@ pub fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t {
     unsafe { transmute(vtbx1(transmute(a), transmute(b), transmute(c))) }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t {
-    unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly8x8_t = transmute(vtbx1(transmute(a), transmute(b), transmute(c)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2)"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
@@ -68500,7 +66845,6 @@ pub fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t {
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
@@ -68516,33 +66860,8 @@ pub fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t {
     }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t {
-    let mut b: uint8x8x2_t = b;
-    unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]);
-        b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x8_t = transmute(vtbx2(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(c),
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
@@ -68558,30 +66877,6 @@ pub fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t {
     }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t {
-    let mut b: poly8x8x2_t = b;
-    unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]);
-        b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly8x8_t = transmute(vtbx2(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(c),
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3)"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
@@ -68608,7 +66903,6 @@ pub fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t {
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
@@ -68625,35 +66919,8 @@ pub fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t {
     }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t {
-    let mut b: uint8x8x3_t = b;
-    unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]);
-        b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]);
-        b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x8_t = transmute(vtbx3(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(b.2),
-            transmute(c),
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
@@ -68670,32 +66937,6 @@ pub fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t {
     }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t {
-    let mut b: poly8x8x3_t = b;
-    unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]);
-        b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]);
-        b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly8x8_t = transmute(vtbx3(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(b.2),
-            transmute(c),
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4)"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
@@ -68767,7 +67008,6 @@ pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t {
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
@@ -68785,37 +67025,8 @@ pub fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t {
     }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t {
-    let mut b: uint8x8x4_t = b;
-    unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]);
-        b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]);
-        b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]);
-        b.3 = simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x8_t = transmute(vtbx4(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(b.2),
-            transmute(b.3),
-            transmute(c),
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
@@ -68832,34 +67043,6 @@ pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t {
         ))
     }
 }
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t {
-    let mut b: poly8x8x4_t = b;
-    unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]);
-        b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]);
-        b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]);
-        b.3 = simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly8x8_t = transmute(vtbx4(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(b.2),
-            transmute(b.3),
-            transmute(c),
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
 #[doc = "Transpose elements"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f16)"]
 #[inline]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 0a466dda713e0..b47a8b1805e63 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -3595,7 +3595,6 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr: [*neon-stable]
     assert_instr: [ld2]
-    big_endian_inverse: false
     safety:
       unsafe: [neon]
     types:
@@ -3942,7 +3941,6 @@ intrinsics:
     arguments: ["a: {type[0]}"]
     return_type: "{neon_type[1]}"
     attr: [*neon-stable]
-    big_endian_inverse: false
     safety:
       unsafe: [neon]
     assert_instr: [ld3]
@@ -4081,7 +4079,6 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr: [*neon-stable]
     assert_instr: [ld4]
-    big_endian_inverse: false
     safety:
       unsafe: [neon]
     types:
@@ -4189,7 +4186,6 @@ intrinsics:
       - *neon-stable
     static_defs:
       - "const LANE: i32"
-    big_endian_inverse: false
     safety:
       unsafe: [neon]
     types:
@@ -4239,7 +4235,6 @@ intrinsics:
       - *neon-stable
     static_defs:
       - "const LANE: i32"
-    big_endian_inverse: false
     safety:
       unsafe: [neon]
     types:
@@ -8530,7 +8525,6 @@ intrinsics:
     attr: [*neon-stable]
     assert_instr: [nop]
     safety: safe
-    big_endian_inverse: false
     types:
       - [poly64x1_t, int64x1_t]
       - [poly64x1_t, uint64x1_t]
@@ -8562,6 +8556,7 @@ intrinsics:
     attr: [*neon-stable]
     assert_instr: [nop]
     safety: safe
+    big_endian_inverse: true
     types:
       - [float64x1_t, int8x8_t]
       - [float64x1_t, int16x4_t]
@@ -8618,6 +8613,7 @@ intrinsics:
       - *target-not-arm64ec
     assert_instr: [nop]
     safety: safe
+    big_endian_inverse: true
     types:
       - [float64x1_t, float16x4_t]
       - [float16x4_t, float64x1_t]
@@ -8979,6 +8975,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]]
       - *neon-stable
     safety: safe
+    big_endian_inverse: true
     types:
       - ["u64", float64x1_t]
     compose:
@@ -12015,7 +12012,6 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
       - *neon-stable
-    big_endian_inverse: false
     safety: safe
     types:
       - [int8x8_t, 'int8x8x4_t', 'int8x16x2', 'int8x8', 'i8x8::splat(32)']
@@ -12046,7 +12042,6 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
       - *neon-stable
-    big_endian_inverse: false
     safety: safe
     types:
       - [uint8x8_t, 'uint8x8x4_t', 'uint8x8_t', 'uint8x16x2', 'uint8x8', 'u8x8::splat(32)']
@@ -12079,7 +12074,6 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
     safety: safe
-    big_endian_inverse: false
     types:
       - [int8x8_t, 'int8x8_t', 'unsafe {{ transmute(b) }}']
       - [uint8x8_t, 'uint8x8_t', 'b']
@@ -12100,7 +12094,6 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
-    big_endian_inverse: false
     safety: safe
     types:
       - ['int8x8x2_t',  'int8x8_t',  'int8x8_t']
@@ -12117,7 +12110,6 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
-    big_endian_inverse: false
     safety: safe
     types:
       - ['uint8x8x2_t', 'uint8x8_t', 'uint8x8_t']
@@ -12136,7 +12128,6 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
     safety: safe
-    big_endian_inverse: false
     types:
       - ['int8x8x3_t', 'int8x8_t', 'int8x8_t', 'int8x16x2']
     compose:
@@ -12160,7 +12151,6 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
     safety: safe
-    big_endian_inverse: false
     types:
       - [uint8x8x3_t, 'uint8x8_t', 'uint8x8_t', 'uint8x16x2']
       - [poly8x8x3_t, 'uint8x8_t', 'poly8x8_t', 'poly8x16x2']
@@ -12184,7 +12174,6 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
     safety: safe
-    big_endian_inverse: false
     types:
       - ['int8x8x4_t', 'int8x8_t', 'int8x8_t', 'int8x16x2']
     compose:
@@ -12207,7 +12196,6 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
     safety: safe
-    big_endian_inverse: false
     types:
       - [uint8x8x4_t, 'uint8x8_t', 'uint8x8_t', 'uint8x16x2']
       - [poly8x8x4_t, 'uint8x8_t', 'poly8x8_t', 'poly8x16x2']
@@ -12250,7 +12238,6 @@ intrinsics:
       - [poly8x8_t, "poly8x16_t", uint8x8_t, "vqtbx1", "_p8"]
       - [uint8x16_t, "uint8x16_t", uint8x16_t, "vqtbx1q", "q_u8"]
       - [poly8x16_t, "poly8x16_t", uint8x16_t, "vqtbx1q", "q_p8"]
-    big_endian_inverse: false
     compose:
       - FnCall:
           - transmute
@@ -12267,7 +12254,6 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
       - *neon-stable
-    big_endian_inverse: false
     safety: safe
     types:
       - [int8x8_t, "int8x8_t", "int8x8", "i8x8::splat(8)"]
@@ -12292,7 +12278,6 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
       - *neon-stable
-    big_endian_inverse: false
     safety: safe
     types:
       - [uint8x8_t, "uint8x8_t", "uint8x8", "u8x8::splat(8)"]
@@ -12318,7 +12303,6 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
       - *neon-stable
-    big_endian_inverse: false
     safety: safe
     types:
       - [int8x8_t, 'int8x8x2_t', 'int8x8', 'i8x8::splat(16)']
@@ -12343,7 +12327,6 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
       - *neon-stable
-    big_endian_inverse: false
     safety: safe
     types:
       - [uint8x8_t, 'uint8x8x2_t', uint8x8_t, 'uint8x8', 'u8x8::splat(16)']
@@ -12369,7 +12352,6 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
       - *neon-stable
-    big_endian_inverse: false
     safety: safe
     types:
       - [int8x8_t, 'int8x8x3_t', 'int8x16x2', 'int8x8', 'i8x8::splat(24)']
@@ -12400,7 +12382,6 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
       - *neon-stable
-    big_endian_inverse: false
     safety: safe
     types:
       - [uint8x8_t, 'uint8x8x3_t', 'uint8x16x2', 'uint8x8', 'u8x8::splat(24)']
@@ -12452,7 +12433,6 @@ intrinsics:
       - ['poly8x16_t', uint8x8_t, 'vqtbl1', 'poly8x8_t']
       - ['uint8x16_t', uint8x16_t, 'vqtbl1q', 'uint8x16_t']
       - ['poly8x16_t', uint8x16_t, 'vqtbl1q', 'poly8x16_t']
-    big_endian_inverse: false
     compose:
       - FnCall:
           - transmute
@@ -12482,7 +12462,6 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
-    big_endian_inverse: false
     safety: safe
     types:
       - ['uint8x16x2_t', uint8x8_t, 'vqtbl2', 'uint8x8_t']
@@ -12519,7 +12498,6 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
       - *neon-stable
-    big_endian_inverse: false
     safety: safe
     types:
       - [uint8x8_t, 'uint8x16x2_t', uint8x8_t, 'vqtbx2']
@@ -12543,7 +12521,6 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
-    big_endian_inverse: false
     safety: safe
     types:
       - ['int8x8_t', 'int8x16x3_t', uint8x8_t, 'vqtbl3']
@@ -12558,7 +12535,6 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
-    big_endian_inverse: false
     safety: safe
     types:
       - ['uint8x8_t', 'uint8x16x3_t', uint8x8_t, 'vqtbl3']
@@ -12596,7 +12572,6 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
       - *neon-stable
-    big_endian_inverse: false
     safety: safe
     types:
       - [uint8x8_t, 'uint8x16x3_t', uint8x8_t, 'vqtbx3']
@@ -12621,7 +12596,6 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
-    big_endian_inverse: false
     safety: safe
     types:
       - ['int8x16x4_t', uint8x8_t, 'vqtbl4', 'int8x8_t']
@@ -12636,7 +12610,6 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
-    big_endian_inverse: false
     safety: safe
     types:
       - ['uint8x16x4_t', uint8x8_t, 'vqtbl4', 'uint8x8_t']
@@ -12675,7 +12648,6 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
       - *neon-stable
-    big_endian_inverse: false
     safety: safe
     types:
       - [uint8x8_t, 'uint8x16x4_t', uint8x8_t, 'vqtbx4']
@@ -12740,7 +12712,6 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
-    big_endian_inverse: false
     safety: safe
     types:
       - ["vqtbl3", int8x16_t, uint8x8_t, int8x8_t]
@@ -12760,7 +12731,6 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
-    big_endian_inverse: false
     safety: safe
     types:
       - ["vqtbl4", int8x16_t, uint8x8_t, int8x8_t]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
index b6683080348a0..18b4b2afa7f5f 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@@ -2685,7 +2685,6 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld]]}]]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
-    big_endian_inverse: false
     safety:
       unsafe: [neon]
     types:
@@ -2744,7 +2743,6 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld]]}]]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
-    big_endian_inverse: false
     safety:
       unsafe: [neon]
     types:
@@ -2982,7 +2980,6 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2]]}]]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
-    big_endian_inverse: false
     safety:
       unsafe: [neon]
     types:
@@ -3013,7 +3010,6 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
-    big_endian_inverse: false
     safety:
       unsafe: [neon]
     types:
@@ -3110,7 +3106,6 @@ intrinsics:
       - *neon-cfg-arm-unstable
     static_defs:
       - "const LANE: i32"
-    big_endian_inverse: false
     safety:
       unsafe: [neon]
     types:
@@ -4099,7 +4094,6 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     static_defs: ['const LANE: i32']
-    big_endian_inverse: false
     safety:
       unsafe: [neon]
     types:
@@ -4130,7 +4124,6 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3]]}]]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
-    big_endian_inverse: false
     safety:
       unsafe: [neon]
     types:
@@ -4494,7 +4487,6 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld4]]}]]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
-    big_endian_inverse: false
     safety:
       unsafe: [neon]
     types:
@@ -4616,7 +4608,6 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     static_defs: ["const LANE: i32"]
-    big_endian_inverse: false
     safety:
       unsafe: [neon]
     types:
@@ -8476,7 +8467,6 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
-    big_endian_inverse: false
     types:
       - [p128, int64x2_t]
       - [p128, uint64x2_t]
@@ -8504,6 +8494,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - [poly64x1_t, int32x2_t]
       - [poly64x1_t, uint32x2_t]
@@ -8743,7 +8734,6 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
-    big_endian_inverse: false
     types:
       - [uint8x8_t, int8x8_t]
       - [poly8x8_t, int8x8_t]
@@ -8800,6 +8790,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
+    big_endian_inverse: true
     types:
       # non-q
       - [float32x2_t, float16x4_t]
@@ -8851,7 +8842,6 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
-    big_endian_inverse: false
     types:
       # non-q
       - [poly16x4_t, float16x4_t]
@@ -8883,6 +8873,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
+    big_endian_inverse: true
     types:
       - [poly64x1_t, float16x4_t]
       - [float16x4_t, poly64x1_t]
@@ -9196,6 +9187,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - ["u64", int8x8_t]
       - ["u64", int16x4_t]
@@ -9224,6 +9216,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
+    big_endian_inverse: true
     types:
       - ["u64", float16x4_t]
     compose:
@@ -9241,6 +9234,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - ["u64", poly64x1_t]
     compose:
@@ -13313,7 +13307,6 @@ intrinsics:
     doc: "Add pairwise."
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
     return_type: "{neon_type[0]}"
-    big_endian_inverse: false
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpadd]]}]]
@@ -14338,7 +14331,6 @@ intrinsics:
     doc: "Load one single-element structure and Replicate to all lanes (of one register)."
     arguments: ["ptr: {type[1]}"]
     return_type: "{neon_type[2]}"
-    big_endian_inverse: false
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['"{type[3]}"']] }  ]]
diff --git a/library/stdarch/crates/stdarch-gen-arm/src/intrinsic.rs b/library/stdarch/crates/stdarch-gen-arm/src/intrinsic.rs
index dc467fd307810..72fb97fee1f08 100644
--- a/library/stdarch/crates/stdarch-gen-arm/src/intrinsic.rs
+++ b/library/stdarch/crates/stdarch-gen-arm/src/intrinsic.rs
@@ -1059,23 +1059,8 @@ impl Intrinsic {
 
     /// Add a big endian implementation
     fn generate_big_endian(&self, variant: &mut Intrinsic) {
-        /* We can't always blindly reverse the bits only in certain conditions
-         * do we need a different order - thus this allows us to have the
-         * ability to do so without having to play codegolf with the yaml AST */
-        let should_reverse = {
-            if let Some(should_reverse) = variant.big_endian_inverse {
-                should_reverse
-            } else if variant.compose.len() == 1 {
-                match &variant.compose[0] {
-                    Expression::FnCall(fn_call) => fn_call.0.to_string() == "transmute",
-                    _ => false,
-                }
-            } else {
-                false
-            }
-        };
-
-        if !should_reverse {
+        // We only reverse if it was specifically requested
+        if !variant.big_endian_inverse.unwrap_or(false) {
             return;
         }
 

From 611b20079cb3ce953519e0e7cab8f4650fe01d6c Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Sun, 10 May 2026 07:25:39 +0530
Subject: [PATCH 43/78] Revert "Change implementation of `vld1_dup`"

This reverts commit 5f676a53f584e6304985345791525fa06bb9130e.
---
 .../crates/core_arch/src/arm_shared/neon/generated.rs  |  8 ++++----
 .../stdarch-gen-arm/spec/neon/arm_shared.spec.yml      | 10 +++++-----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index 476d8e78a21a2..6c300921d3575 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -14614,8 +14614,8 @@ pub fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1_dup_f16(ptr: *const f16) -> float16x4_t {
-    let x = vld1_lane_f16::<0>(ptr, transmute(f16x4::splat(0.0)));
-    vdup_lane_f16::<0>(x)
+    let x: float16x4_t = vld1_lane_f16::<0>(ptr, transmute(f16x4::splat(0.0)));
+    simd_shuffle!(x, x, [0, 0, 0, 0])
 }
 #[doc = "Load one single-element structure and replicate to all lanes of one register"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_f16)"]
@@ -14633,8 +14633,8 @@ pub unsafe fn vld1_dup_f16(ptr: *const f16) -> float16x4_t {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1q_dup_f16(ptr: *const f16) -> float16x8_t {
-    let x = vld1q_lane_f16::<0>(ptr, transmute(f16x8::splat(0.0)));
-    vdupq_laneq_f16::<0>(x)
+    let x: float16x8_t = vld1q_lane_f16::<0>(ptr, transmute(f16x8::splat(0.0)));
+    simd_shuffle!(x, x, [0, 0, 0, 0, 0, 0, 0, 0])
 }
 #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_f32)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
index 1f7e1f6987a5a..19c35a5895c9f 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@@ -2835,7 +2835,7 @@ intrinsics:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall: [simd_insert!, [src, "LANE as u32", "*ptr"]]
 
-  - name: "vld1{neon_type[1].dup_nox}"
+  - name: "vld1{type[2]}_{neon_type[1]}"
     doc: "Load one single-element structure and replicate to all lanes of one register"
     arguments: ["ptr: {type[0]}"]
     return_type: "{neon_type[1]}"
@@ -2849,11 +2849,11 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - ["*const f16", float16x4_t, '_lane', 'f16x4']
-      - ["*const f16", float16x8_t, 'q_laneq', 'f16x8']
+      - ["*const f16", float16x4_t, '_dup', 'f16x4', "[0, 0, 0, 0]"]
+      - ["*const f16", float16x8_t, 'q_dup', 'f16x8', "[0, 0, 0, 0, 0, 0, 0, 0]"]
     compose:
-      - Let: [x, {FnCall: ["vld1{neon_type[1].lane_nox}", [ptr, {FnCall: [transmute, ["{type[3]}::splat(0.0)"]]}], [0]]}]
-      - FnCall: ['vdup{type[2]}_{neon_type[1]}', [x], [0]]
+      - Let: [x, "{neon_type[1]}", "vld1{neon_type[1].lane_nox}::<0>(ptr, transmute({type[3]}::splat(0.0)))"]
+      - FnCall: [simd_shuffle!, [x, x, "{type[4]}"]]
 
 
   - name: "vld2{neon_type[1].nox}"

From 46b8878c569cc4630d58950ed7d55d00cd1f84be Mon Sep 17 00:00:00 2001
From: Amanieu d'Antras <amanieu@gmail.com>
Date: Sun, 10 May 2026 18:56:44 +0100
Subject: [PATCH 44/78] Add davidtwco and adamgemmell as maintainers

---
 library/stdarch/triagebot.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/library/stdarch/triagebot.toml b/library/stdarch/triagebot.toml
index 621f39b2cbbf3..5b178f0cdf456 100644
--- a/library/stdarch/triagebot.toml
+++ b/library/stdarch/triagebot.toml
@@ -1,7 +1,7 @@
 [assign]
 
 [assign.owners]
-"*" = ["@Amanieu", "@folkertdev", "@sayantn"]
+"*" = ["@Amanieu", "@folkertdev", "@sayantn", "@davidtwco", "@adamgemmell"]
 
 [ping.windows]
 message = """\

From 2f90236a4dcbd8adedb14ad130934c3b95beb20e Mon Sep 17 00:00:00 2001
From: Iris Shi <0.0@owo.li>
Date: Mon, 11 May 2026 16:58:25 +0800
Subject: [PATCH 45/78] Fix invalid suggestion for parenthesized break

---
 compiler/rustc_hir_typeck/src/loops.rs        |  8 +++++-
 .../break-in-unlabeled-block-parenthesized.rs |  9 +++++++
 ...ak-in-unlabeled-block-parenthesized.stderr | 27 +++++++++++++++++++
 3 files changed, 43 insertions(+), 1 deletion(-)
 create mode 100644 tests/ui/parser/break-in-unlabeled-block-parenthesized.rs
 create mode 100644 tests/ui/parser/break-in-unlabeled-block-parenthesized.stderr

diff --git a/compiler/rustc_hir_typeck/src/loops.rs b/compiler/rustc_hir_typeck/src/loops.rs
index 21b408064fac5..d0009c36f97a4 100644
--- a/compiler/rustc_hir_typeck/src/loops.rs
+++ b/compiler/rustc_hir_typeck/src/loops.rs
@@ -270,7 +270,13 @@ impl<'hir> Visitor<'hir> for CheckLoopVisitor<'hir> {
                     }
                 }
 
-                let sp_lo = e.span.with_lo(e.span.lo() + BytePos("break".len() as u32));
+                let sp_lo = if let Ok(snippet) = self.tcx.sess.source_map().span_to_snippet(e.span)
+                    && let Some(break_pos) = snippet.find("break")
+                {
+                    e.span.with_lo(e.span.lo() + BytePos((break_pos + "break".len()) as u32))
+                } else {
+                    e.span.with_lo(e.span.lo() + BytePos("break".len() as u32))
+                };
                 let label_sp = match break_destination.label {
                     Some(label) => sp_lo.with_hi(label.ident.span.hi()),
                     None => sp_lo.shrink_to_lo(),
diff --git a/tests/ui/parser/break-in-unlabeled-block-parenthesized.rs b/tests/ui/parser/break-in-unlabeled-block-parenthesized.rs
new file mode 100644
index 0000000000000..787aaf0c18450
--- /dev/null
+++ b/tests/ui/parser/break-in-unlabeled-block-parenthesized.rs
@@ -0,0 +1,9 @@
+#![allow(unused_parens)]
+fn main() {
+    {
+        (break); //~ ERROR `break` outside of a loop or labeled block
+    };
+    {
+        ((break)); //~ ERROR `break` outside of a loop or labeled block
+    };
+}
diff --git a/tests/ui/parser/break-in-unlabeled-block-parenthesized.stderr b/tests/ui/parser/break-in-unlabeled-block-parenthesized.stderr
new file mode 100644
index 0000000000000..17d3c977ffc90
--- /dev/null
+++ b/tests/ui/parser/break-in-unlabeled-block-parenthesized.stderr
@@ -0,0 +1,27 @@
+error[E0268]: `break` outside of a loop or labeled block
+  --> $DIR/break-in-unlabeled-block-parenthesized.rs:4:9
+   |
+LL |         (break);
+   |         ^^^^^^^ cannot `break` outside of a loop or labeled block
+   |
+help: consider labeling this block to be able to break within it
+   |
+LL ~     'block: {
+LL ~         (break 'block);
+   |
+
+error[E0268]: `break` outside of a loop or labeled block
+  --> $DIR/break-in-unlabeled-block-parenthesized.rs:7:9
+   |
+LL |         ((break));
+   |         ^^^^^^^^^ cannot `break` outside of a loop or labeled block
+   |
+help: consider labeling this block to be able to break within it
+   |
+LL ~     'block: {
+LL ~         ((break 'block));
+   |
+
+error: aborting due to 2 previous errors
+
+For more information about this error, try `rustc --explain E0268`.

From 98b0121a9d2571663ea1609247c4bdaaf0d006b8 Mon Sep 17 00:00:00 2001
From: WANG Rui <wangrui@loongson.cn>
Date: Wed, 6 May 2026 22:05:30 +0800
Subject: [PATCH 46/78] loongarch: Use `intrinsics::simd` for vabsd

---
 .../src/loongarch64/lasx/generated.rs         | 72 -------------------
 .../src/loongarch64/lasx/portable.rs          |  8 +++
 .../src/loongarch64/lsx/generated.rs          | 72 -------------------
 .../core_arch/src/loongarch64/lsx/portable.rs |  8 +++
 .../crates/core_arch/src/loongarch64/simd.rs  |  7 ++
 .../crates/stdarch-gen-loongarch/lasx.spec    |  8 +++
 .../crates/stdarch-gen-loongarch/lsx.spec     |  8 +++
 .../src/portable-intrinsics.txt               | 16 +++++
 8 files changed, 55 insertions(+), 144 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs b/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
index 5ef7132637468..f8667887c74c1 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
@@ -123,22 +123,6 @@ unsafe extern "unadjusted" {
     fn __lasx_xvavgr_wu(a: __v8u32, b: __v8u32) -> __v8u32;
     #[link_name = "llvm.loongarch.lasx.xvavgr.du"]
     fn __lasx_xvavgr_du(a: __v4u64, b: __v4u64) -> __v4u64;
-    #[link_name = "llvm.loongarch.lasx.xvabsd.b"]
-    fn __lasx_xvabsd_b(a: __v32i8, b: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvabsd.h"]
-    fn __lasx_xvabsd_h(a: __v16i16, b: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvabsd.w"]
-    fn __lasx_xvabsd_w(a: __v8i32, b: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvabsd.d"]
-    fn __lasx_xvabsd_d(a: __v4i64, b: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvabsd.bu"]
-    fn __lasx_xvabsd_bu(a: __v32u8, b: __v32u8) -> __v32u8;
-    #[link_name = "llvm.loongarch.lasx.xvabsd.hu"]
-    fn __lasx_xvabsd_hu(a: __v16u16, b: __v16u16) -> __v16u16;
-    #[link_name = "llvm.loongarch.lasx.xvabsd.wu"]
-    fn __lasx_xvabsd_wu(a: __v8u32, b: __v8u32) -> __v8u32;
-    #[link_name = "llvm.loongarch.lasx.xvabsd.du"]
-    fn __lasx_xvabsd_du(a: __v4u64, b: __v4u64) -> __v4u64;
     #[link_name = "llvm.loongarch.lasx.xvhaddw.h.b"]
     fn __lasx_xvhaddw_h_b(a: __v32i8, b: __v32i8) -> __v16i16;
     #[link_name = "llvm.loongarch.lasx.xvhaddw.w.h"]
@@ -1549,62 +1533,6 @@ pub fn lasx_xvavgr_du(a: m256i, b: m256i) -> m256i {
     unsafe { transmute(__lasx_xvavgr_du(transmute(a), transmute(b))) }
 }
 
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvabsd_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvabsd_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvabsd_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvabsd_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvabsd_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvabsd_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvabsd_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvabsd_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvabsd_bu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvabsd_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvabsd_hu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvabsd_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvabsd_wu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvabsd_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvabsd_du(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvabsd_du(transmute(a), transmute(b))) }
-}
-
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs b/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs
index 366deda0c17c6..89535582101fc 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs
@@ -152,6 +152,14 @@ impl_vvv!("lasx", lasx_xvadda_b, ls::simd_adda, m256i, i8x32);
 impl_vvv!("lasx", lasx_xvadda_h, ls::simd_adda, m256i, i16x16);
 impl_vvv!("lasx", lasx_xvadda_w, ls::simd_adda, m256i, i32x8);
 impl_vvv!("lasx", lasx_xvadda_d, ls::simd_adda, m256i, i64x4);
+impl_vvv!("lasx", lasx_xvabsd_b, ls::simd_absd, m256i, i8x32);
+impl_vvv!("lasx", lasx_xvabsd_h, ls::simd_absd, m256i, i16x16);
+impl_vvv!("lasx", lasx_xvabsd_w, ls::simd_absd, m256i, i32x8);
+impl_vvv!("lasx", lasx_xvabsd_d, ls::simd_absd, m256i, i64x4);
+impl_vvv!("lasx", lasx_xvabsd_bu, ls::simd_absd, m256i, u8x32);
+impl_vvv!("lasx", lasx_xvabsd_hu, ls::simd_absd, m256i, u16x16);
+impl_vvv!("lasx", lasx_xvabsd_wu, ls::simd_absd, m256i, u32x8);
+impl_vvv!("lasx", lasx_xvabsd_du, ls::simd_absd, m256i, u64x4);
 
 impl_vuv!("lasx", lasx_xvslli_b, is::simd_shl, m256i, i8x32);
 impl_vuv!("lasx", lasx_xvslli_h, is::simd_shl, m256i, i16x16);
diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs b/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
index 1df46a43fedf9..41ced43454273 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
@@ -123,22 +123,6 @@ unsafe extern "unadjusted" {
     fn __lsx_vavgr_wu(a: __v4u32, b: __v4u32) -> __v4u32;
     #[link_name = "llvm.loongarch.lsx.vavgr.du"]
     fn __lsx_vavgr_du(a: __v2u64, b: __v2u64) -> __v2u64;
-    #[link_name = "llvm.loongarch.lsx.vabsd.b"]
-    fn __lsx_vabsd_b(a: __v16i8, b: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vabsd.h"]
-    fn __lsx_vabsd_h(a: __v8i16, b: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vabsd.w"]
-    fn __lsx_vabsd_w(a: __v4i32, b: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vabsd.d"]
-    fn __lsx_vabsd_d(a: __v2i64, b: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vabsd.bu"]
-    fn __lsx_vabsd_bu(a: __v16u8, b: __v16u8) -> __v16u8;
-    #[link_name = "llvm.loongarch.lsx.vabsd.hu"]
-    fn __lsx_vabsd_hu(a: __v8u16, b: __v8u16) -> __v8u16;
-    #[link_name = "llvm.loongarch.lsx.vabsd.wu"]
-    fn __lsx_vabsd_wu(a: __v4u32, b: __v4u32) -> __v4u32;
-    #[link_name = "llvm.loongarch.lsx.vabsd.du"]
-    fn __lsx_vabsd_du(a: __v2u64, b: __v2u64) -> __v2u64;
     #[link_name = "llvm.loongarch.lsx.vhaddw.h.b"]
     fn __lsx_vhaddw_h_b(a: __v16i8, b: __v16i8) -> __v8i16;
     #[link_name = "llvm.loongarch.lsx.vhaddw.w.h"]
@@ -1461,62 +1445,6 @@ pub fn lsx_vavgr_du(a: m128i, b: m128i) -> m128i {
     unsafe { transmute(__lsx_vavgr_du(transmute(a), transmute(b))) }
 }
 
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vabsd_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vabsd_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vabsd_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vabsd_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vabsd_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vabsd_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vabsd_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vabsd_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vabsd_bu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vabsd_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vabsd_hu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vabsd_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vabsd_wu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vabsd_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vabsd_du(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vabsd_du(transmute(a), transmute(b))) }
-}
-
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs b/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs
index 55d349142ec86..16f00582357b0 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs
@@ -152,6 +152,14 @@ impl_vvv!("lsx", lsx_vadda_b, ls::simd_adda, m128i, i8x16);
 impl_vvv!("lsx", lsx_vadda_h, ls::simd_adda, m128i, i16x8);
 impl_vvv!("lsx", lsx_vadda_w, ls::simd_adda, m128i, i32x4);
 impl_vvv!("lsx", lsx_vadda_d, ls::simd_adda, m128i, i64x2);
+impl_vvv!("lsx", lsx_vabsd_b, ls::simd_absd, m128i, i8x16);
+impl_vvv!("lsx", lsx_vabsd_h, ls::simd_absd, m128i, i16x8);
+impl_vvv!("lsx", lsx_vabsd_w, ls::simd_absd, m128i, i32x4);
+impl_vvv!("lsx", lsx_vabsd_d, ls::simd_absd, m128i, i64x2);
+impl_vvv!("lsx", lsx_vabsd_bu, ls::simd_absd, m128i, u8x16);
+impl_vvv!("lsx", lsx_vabsd_hu, ls::simd_absd, m128i, u16x8);
+impl_vvv!("lsx", lsx_vabsd_wu, ls::simd_absd, m128i, u32x4);
+impl_vvv!("lsx", lsx_vabsd_du, ls::simd_absd, m128i, u64x2);
 
 impl_vuv!("lsx", lsx_vslli_b, is::simd_shl, m128i, i8x16);
 impl_vuv!("lsx", lsx_vslli_h, is::simd_shl, m128i, i16x8);
diff --git a/library/stdarch/crates/core_arch/src/loongarch64/simd.rs b/library/stdarch/crates/core_arch/src/loongarch64/simd.rs
index ccad4b8c48729..b4ec6881c36ab 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/simd.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/simd.rs
@@ -52,6 +52,13 @@ pub(crate) const unsafe fn simd_abs<T: Copy + const SimdExt>(a: T) -> T {
     is::simd_select(m, is::simd_neg(a), a)
 }
 
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const unsafe fn simd_absd<T: Copy>(a: T, b: T) -> T {
+    let m: T = is::simd_gt(a, b);
+    is::simd_select(m, is::simd_sub(a, b), is::simd_sub(b, a))
+}
+
 #[inline(always)]
 #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
 pub(crate) const unsafe fn simd_adda<T: Copy + const SimdExt>(a: T, b: T) -> T {
diff --git a/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec b/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec
index 07f2ed49d3a68..ae69f194103dd 100644
--- a/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec
+++ b/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec
@@ -1124,41 +1124,49 @@ asm-fmts = xd, xj, xk
 data-types = UV4DI, UV4DI, UV4DI
 
 /// lasx_xvabsd_b
+impl = portable
 name = lasx_xvabsd_b
 asm-fmts = xd, xj, xk
 data-types = V32QI, V32QI, V32QI
 
 /// lasx_xvabsd_h
+impl = portable
 name = lasx_xvabsd_h
 asm-fmts = xd, xj, xk
 data-types = V16HI, V16HI, V16HI
 
 /// lasx_xvabsd_w
+impl = portable
 name = lasx_xvabsd_w
 asm-fmts = xd, xj, xk
 data-types = V8SI, V8SI, V8SI
 
 /// lasx_xvabsd_d
+impl = portable
 name = lasx_xvabsd_d
 asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI
 
 /// lasx_xvabsd_bu
+impl = portable
 name = lasx_xvabsd_bu
 asm-fmts = xd, xj, xk
 data-types = UV32QI, UV32QI, UV32QI
 
 /// lasx_xvabsd_hu
+impl = portable
 name = lasx_xvabsd_hu
 asm-fmts = xd, xj, xk
 data-types = UV16HI, UV16HI, UV16HI
 
 /// lasx_xvabsd_wu
+impl = portable
 name = lasx_xvabsd_wu
 asm-fmts = xd, xj, xk
 data-types = UV8SI, UV8SI, UV8SI
 
 /// lasx_xvabsd_du
+impl = portable
 name = lasx_xvabsd_du
 asm-fmts = xd, xj, xk
 data-types = UV4DI, UV4DI, UV4DI
diff --git a/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec b/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec
index 0b32dd83c72ff..48e98d59b602c 100644
--- a/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec
+++ b/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec
@@ -1124,41 +1124,49 @@ asm-fmts = vd, vj, vk
 data-types = UV2DI, UV2DI, UV2DI
 
 /// lsx_vabsd_b
+impl = portable
 name = lsx_vabsd_b
 asm-fmts = vd, vj, vk
 data-types = V16QI, V16QI, V16QI
 
 /// lsx_vabsd_h
+impl = portable
 name = lsx_vabsd_h
 asm-fmts = vd, vj, vk
 data-types = V8HI, V8HI, V8HI
 
 /// lsx_vabsd_w
+impl = portable
 name = lsx_vabsd_w
 asm-fmts = vd, vj, vk
 data-types = V4SI, V4SI, V4SI
 
 /// lsx_vabsd_d
+impl = portable
 name = lsx_vabsd_d
 asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI
 
 /// lsx_vabsd_bu
+impl = portable
 name = lsx_vabsd_bu
 asm-fmts = vd, vj, vk
 data-types = UV16QI, UV16QI, UV16QI
 
 /// lsx_vabsd_hu
+impl = portable
 name = lsx_vabsd_hu
 asm-fmts = vd, vj, vk
 data-types = UV8HI, UV8HI, UV8HI
 
 /// lsx_vabsd_wu
+impl = portable
 name = lsx_vabsd_wu
 asm-fmts = vd, vj, vk
 data-types = UV4SI, UV4SI, UV4SI
 
 /// lsx_vabsd_du
+impl = portable
 name = lsx_vabsd_du
 asm-fmts = vd, vj, vk
 data-types = UV2DI, UV2DI, UV2DI
diff --git a/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt b/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt
index 6e3e5fe32e33f..e9b7b04f27235 100644
--- a/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt
+++ b/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt
@@ -223,6 +223,14 @@ lsx_vadda_b
 lsx_vadda_h
 lsx_vadda_w
 lsx_vadda_d
+lsx_vabsd_b
+lsx_vabsd_h
+lsx_vabsd_w
+lsx_vabsd_d
+lsx_vabsd_bu
+lsx_vabsd_hu
+lsx_vabsd_wu
+lsx_vabsd_du
 
 # LASX intrinsics
 lasx_xvsll_b
@@ -443,3 +451,11 @@ lasx_xvadda_b
 lasx_xvadda_h
 lasx_xvadda_w
 lasx_xvadda_d
+lasx_xvabsd_b
+lasx_xvabsd_h
+lasx_xvabsd_w
+lasx_xvabsd_d
+lasx_xvabsd_bu
+lasx_xvabsd_hu
+lasx_xvabsd_wu
+lasx_xvabsd_du

From f0a72ada819a89d73fd3154f9c224e7ebbc2c4ee Mon Sep 17 00:00:00 2001
From: Waffle Lapkin <waffle.lapkin@gmail.com>
Date: Wed, 8 Apr 2026 22:32:17 +0200
Subject: [PATCH 47/78] stabilize
 `feature(cfg_target_has_atomic_equal_alignment)`

---
 compiler/rustc_feature/src/accepted.rs        |  2 +
 compiler/rustc_feature/src/builtin_attrs.rs   |  5 --
 compiler/rustc_feature/src/unstable.rs        |  2 -
 library/core/src/lib.rs                       |  1 -
 tests/ui/check-cfg/well-known-values.rs       |  1 -
 tests/ui/check-cfg/well-known-values.stderr   | 58 ++++++++---------
 ...e-cfg-target-has-atomic-equal-alignment.rs | 14 -----
 ...g-target-has-atomic-equal-alignment.stderr | 63 -------------------
 8 files changed, 31 insertions(+), 115 deletions(-)
 delete mode 100644 tests/ui/feature-gates/feature-gate-cfg-target-has-atomic-equal-alignment.rs
 delete mode 100644 tests/ui/feature-gates/feature-gate-cfg-target-has-atomic-equal-alignment.stderr

diff --git a/compiler/rustc_feature/src/accepted.rs b/compiler/rustc_feature/src/accepted.rs
index 54a935d13c8c1..7b7724879e1f5 100644
--- a/compiler/rustc_feature/src/accepted.rs
+++ b/compiler/rustc_feature/src/accepted.rs
@@ -108,6 +108,8 @@ declare_features! (
     (accepted, cfg_target_abi, "1.78.0", Some(80970)),
     /// Allows `cfg(target_feature = "...")`.
     (accepted, cfg_target_feature, "1.27.0", Some(29717)),
+    /// Allows `cfg(target_has_atomic_equal_alignment = "...")`.
+    (accepted, cfg_target_has_atomic_equal_alignment, "CURRENT_RUSTC_VERSION", Some(93822)),
     /// Allows `cfg(target_vendor = "...")`.
     (accepted, cfg_target_vendor, "1.33.0", Some(29718)),
     /// Allows implementing `Clone` for closures where possible (RFC 2132).
diff --git a/compiler/rustc_feature/src/builtin_attrs.rs b/compiler/rustc_feature/src/builtin_attrs.rs
index 58bf12855ad6e..e10b9a77be851 100644
--- a/compiler/rustc_feature/src/builtin_attrs.rs
+++ b/compiler/rustc_feature/src/builtin_attrs.rs
@@ -20,11 +20,6 @@ const GATED_CFGS: &[GatedCfg] = &[
     (sym::ub_checks, sym::cfg_ub_checks, Features::cfg_ub_checks),
     (sym::contract_checks, sym::cfg_contract_checks, Features::cfg_contract_checks),
     (sym::target_thread_local, sym::cfg_target_thread_local, Features::cfg_target_thread_local),
-    (
-        sym::target_has_atomic_equal_alignment,
-        sym::cfg_target_has_atomic_equal_alignment,
-        Features::cfg_target_has_atomic_equal_alignment,
-    ),
     (
         sym::target_has_atomic_load_store,
         sym::cfg_target_has_atomic,
diff --git a/compiler/rustc_feature/src/unstable.rs b/compiler/rustc_feature/src/unstable.rs
index 0d44d3c0d4d79..11fcdaf163e74 100644
--- a/compiler/rustc_feature/src/unstable.rs
+++ b/compiler/rustc_feature/src/unstable.rs
@@ -442,8 +442,6 @@ declare_features! (
     (unstable, cfg_target_compact, "1.63.0", Some(96901)),
     /// Allows `cfg(target_has_atomic_load_store = "...")`.
     (unstable, cfg_target_has_atomic, "1.60.0", Some(94039)),
-    /// Allows `cfg(target_has_atomic_equal_alignment = "...")`.
-    (unstable, cfg_target_has_atomic_equal_alignment, "1.60.0", Some(93822)),
     /// Allows `cfg(target_object_format = "...")`.
     (unstable, cfg_target_object_format, "CURRENT_RUSTC_VERSION", Some(152586)),
     /// Allows `cfg(target_thread_local)`.
diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs
index 57ce51bb8c0ed..01bfd18ab89d6 100644
--- a/library/core/src/lib.rs
+++ b/library/core/src/lib.rs
@@ -123,7 +123,6 @@
 #![feature(auto_traits)]
 #![feature(cfg_sanitize)]
 #![feature(cfg_target_has_atomic)]
-#![feature(cfg_target_has_atomic_equal_alignment)]
 #![feature(cfg_ub_checks)]
 #![feature(const_closures)]
 #![feature(const_precise_live_drops)]
diff --git a/tests/ui/check-cfg/well-known-values.rs b/tests/ui/check-cfg/well-known-values.rs
index f48438d142467..5cb5a88745996 100644
--- a/tests/ui/check-cfg/well-known-values.rs
+++ b/tests/ui/check-cfg/well-known-values.rs
@@ -13,7 +13,6 @@
 #![feature(cfg_relocation_model)]
 #![feature(cfg_sanitize)]
 #![feature(cfg_target_has_atomic)]
-#![feature(cfg_target_has_atomic_equal_alignment)]
 #![feature(cfg_target_thread_local)]
 #![feature(cfg_target_object_format)]
 #![feature(cfg_ub_checks)]
diff --git a/tests/ui/check-cfg/well-known-values.stderr b/tests/ui/check-cfg/well-known-values.stderr
index dd1b696b76cb2..a35d8b025b8e2 100644
--- a/tests/ui/check-cfg/well-known-values.stderr
+++ b/tests/ui/check-cfg/well-known-values.stderr
@@ -1,5 +1,5 @@
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:30:5
+  --> $DIR/well-known-values.rs:29:5
    |
 LL |     clippy = "_UNEXPECTED_VALUE",
    |     ^^^^^^----------------------
@@ -11,7 +11,7 @@ LL |     clippy = "_UNEXPECTED_VALUE",
    = note: `#[warn(unexpected_cfgs)]` on by default
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:32:5
+  --> $DIR/well-known-values.rs:31:5
    |
 LL |     debug_assertions = "_UNEXPECTED_VALUE",
    |     ^^^^^^^^^^^^^^^^----------------------
@@ -22,7 +22,7 @@ LL |     debug_assertions = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:34:5
+  --> $DIR/well-known-values.rs:33:5
    |
 LL |     doc = "_UNEXPECTED_VALUE",
    |     ^^^----------------------
@@ -33,7 +33,7 @@ LL |     doc = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:36:5
+  --> $DIR/well-known-values.rs:35:5
    |
 LL |     doctest = "_UNEXPECTED_VALUE",
    |     ^^^^^^^----------------------
@@ -44,7 +44,7 @@ LL |     doctest = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:38:5
+  --> $DIR/well-known-values.rs:37:5
    |
 LL |     fmt_debug = "_UNEXPECTED_VALUE",
    |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -53,7 +53,7 @@ LL |     fmt_debug = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:40:5
+  --> $DIR/well-known-values.rs:39:5
    |
 LL |     miri = "_UNEXPECTED_VALUE",
    |     ^^^^----------------------
@@ -64,7 +64,7 @@ LL |     miri = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:42:5
+  --> $DIR/well-known-values.rs:41:5
    |
 LL |     overflow_checks = "_UNEXPECTED_VALUE",
    |     ^^^^^^^^^^^^^^^----------------------
@@ -75,7 +75,7 @@ LL |     overflow_checks = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:44:5
+  --> $DIR/well-known-values.rs:43:5
    |
 LL |     panic = "_UNEXPECTED_VALUE",
    |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -84,7 +84,7 @@ LL |     panic = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:46:5
+  --> $DIR/well-known-values.rs:45:5
    |
 LL |     proc_macro = "_UNEXPECTED_VALUE",
    |     ^^^^^^^^^^----------------------
@@ -95,7 +95,7 @@ LL |     proc_macro = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:48:5
+  --> $DIR/well-known-values.rs:47:5
    |
 LL |     relocation_model = "_UNEXPECTED_VALUE",
    |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -104,7 +104,7 @@ LL |     relocation_model = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:50:5
+  --> $DIR/well-known-values.rs:49:5
    |
 LL |     rustfmt = "_UNEXPECTED_VALUE",
    |     ^^^^^^^----------------------
@@ -115,7 +115,7 @@ LL |     rustfmt = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:52:5
+  --> $DIR/well-known-values.rs:51:5
    |
 LL |     sanitize = "_UNEXPECTED_VALUE",
    |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -124,7 +124,7 @@ LL |     sanitize = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:54:5
+  --> $DIR/well-known-values.rs:53:5
    |
 LL |     target_abi = "_UNEXPECTED_VALUE",
    |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -133,7 +133,7 @@ LL |     target_abi = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:56:5
+  --> $DIR/well-known-values.rs:55:5
    |
 LL |     target_arch = "_UNEXPECTED_VALUE",
    |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -142,7 +142,7 @@ LL |     target_arch = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:58:5
+  --> $DIR/well-known-values.rs:57:5
    |
 LL |     target_endian = "_UNEXPECTED_VALUE",
    |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -151,7 +151,7 @@ LL |     target_endian = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:60:5
+  --> $DIR/well-known-values.rs:59:5
    |
 LL |     target_env = "_UNEXPECTED_VALUE",
    |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -160,7 +160,7 @@ LL |     target_env = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:62:5
+  --> $DIR/well-known-values.rs:61:5
    |
 LL |     target_family = "_UNEXPECTED_VALUE",
    |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -169,7 +169,7 @@ LL |     target_family = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:66:5
+  --> $DIR/well-known-values.rs:65:5
    |
 LL |     target_has_atomic = "_UNEXPECTED_VALUE",
    |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -178,7 +178,7 @@ LL |     target_has_atomic = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:68:5
+  --> $DIR/well-known-values.rs:67:5
    |
 LL |     target_has_atomic_equal_alignment = "_UNEXPECTED_VALUE",
    |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -187,7 +187,7 @@ LL |     target_has_atomic_equal_alignment = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:70:5
+  --> $DIR/well-known-values.rs:69:5
    |
 LL |     target_has_atomic_load_store = "_UNEXPECTED_VALUE",
    |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -196,7 +196,7 @@ LL |     target_has_atomic_load_store = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:72:5
+  --> $DIR/well-known-values.rs:71:5
    |
 LL |     target_object_format = "_UNEXPECTED_VALUE",
    |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -205,7 +205,7 @@ LL |     target_object_format = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:74:5
+  --> $DIR/well-known-values.rs:73:5
    |
 LL |     target_os = "_UNEXPECTED_VALUE",
    |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -214,7 +214,7 @@ LL |     target_os = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:76:5
+  --> $DIR/well-known-values.rs:75:5
    |
 LL |     target_pointer_width = "_UNEXPECTED_VALUE",
    |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -223,7 +223,7 @@ LL |     target_pointer_width = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:78:5
+  --> $DIR/well-known-values.rs:77:5
    |
 LL |     target_thread_local = "_UNEXPECTED_VALUE",
    |     ^^^^^^^^^^^^^^^^^^^----------------------
@@ -234,7 +234,7 @@ LL |     target_thread_local = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:80:5
+  --> $DIR/well-known-values.rs:79:5
    |
 LL |     target_vendor = "_UNEXPECTED_VALUE",
    |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -243,7 +243,7 @@ LL |     target_vendor = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:82:5
+  --> $DIR/well-known-values.rs:81:5
    |
 LL |     ub_checks = "_UNEXPECTED_VALUE",
    |     ^^^^^^^^^----------------------
@@ -254,7 +254,7 @@ LL |     ub_checks = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:84:5
+  --> $DIR/well-known-values.rs:83:5
    |
 LL |     unix = "_UNEXPECTED_VALUE",
    |     ^^^^----------------------
@@ -265,7 +265,7 @@ LL |     unix = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
-  --> $DIR/well-known-values.rs:86:5
+  --> $DIR/well-known-values.rs:85:5
    |
 LL |     windows = "_UNEXPECTED_VALUE",
    |     ^^^^^^^----------------------
@@ -276,7 +276,7 @@ LL |     windows = "_UNEXPECTED_VALUE",
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `linuz`
-  --> $DIR/well-known-values.rs:92:7
+  --> $DIR/well-known-values.rs:91:7
    |
 LL | #[cfg(target_os = "linuz")] // testing that we suggest `linux`
    |       ^^^^^^^^^^^^-------
diff --git a/tests/ui/feature-gates/feature-gate-cfg-target-has-atomic-equal-alignment.rs b/tests/ui/feature-gates/feature-gate-cfg-target-has-atomic-equal-alignment.rs
deleted file mode 100644
index 3d692a0700197..0000000000000
--- a/tests/ui/feature-gates/feature-gate-cfg-target-has-atomic-equal-alignment.rs
+++ /dev/null
@@ -1,14 +0,0 @@
-fn main() {
-    cfg!(target_has_atomic_equal_alignment = "8");
-    //~^ ERROR `cfg(target_has_atomic_equal_alignment)` is experimental and subject to change
-    cfg!(target_has_atomic_equal_alignment = "16");
-    //~^ ERROR `cfg(target_has_atomic_equal_alignment)` is experimental and subject to change
-    cfg!(target_has_atomic_equal_alignment = "32");
-    //~^ ERROR `cfg(target_has_atomic_equal_alignment)` is experimental and subject to change
-    cfg!(target_has_atomic_equal_alignment = "64");
-    //~^ ERROR `cfg(target_has_atomic_equal_alignment)` is experimental and subject to change
-    cfg!(target_has_atomic_equal_alignment = "128");
-    //~^ ERROR `cfg(target_has_atomic_equal_alignment)` is experimental and subject to change
-    cfg!(target_has_atomic_equal_alignment = "ptr");
-    //~^ ERROR `cfg(target_has_atomic_equal_alignment)` is experimental and subject to change
-}
diff --git a/tests/ui/feature-gates/feature-gate-cfg-target-has-atomic-equal-alignment.stderr b/tests/ui/feature-gates/feature-gate-cfg-target-has-atomic-equal-alignment.stderr
deleted file mode 100644
index 8d5d232ccc485..0000000000000
--- a/tests/ui/feature-gates/feature-gate-cfg-target-has-atomic-equal-alignment.stderr
+++ /dev/null
@@ -1,63 +0,0 @@
-error[E0658]: `cfg(target_has_atomic_equal_alignment)` is experimental and subject to change
-  --> $DIR/feature-gate-cfg-target-has-atomic-equal-alignment.rs:2:10
-   |
-LL |     cfg!(target_has_atomic_equal_alignment = "8");
-   |          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-   |
-   = note: see issue #93822 <https://github.com/rust-lang/rust/issues/93822> for more information
-   = help: add `#![feature(cfg_target_has_atomic_equal_alignment)]` to the crate attributes to enable
-   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
-
-error[E0658]: `cfg(target_has_atomic_equal_alignment)` is experimental and subject to change
-  --> $DIR/feature-gate-cfg-target-has-atomic-equal-alignment.rs:4:10
-   |
-LL |     cfg!(target_has_atomic_equal_alignment = "16");
-   |          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-   |
-   = note: see issue #93822 <https://github.com/rust-lang/rust/issues/93822> for more information
-   = help: add `#![feature(cfg_target_has_atomic_equal_alignment)]` to the crate attributes to enable
-   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
-
-error[E0658]: `cfg(target_has_atomic_equal_alignment)` is experimental and subject to change
-  --> $DIR/feature-gate-cfg-target-has-atomic-equal-alignment.rs:6:10
-   |
-LL |     cfg!(target_has_atomic_equal_alignment = "32");
-   |          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-   |
-   = note: see issue #93822 <https://github.com/rust-lang/rust/issues/93822> for more information
-   = help: add `#![feature(cfg_target_has_atomic_equal_alignment)]` to the crate attributes to enable
-   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
-
-error[E0658]: `cfg(target_has_atomic_equal_alignment)` is experimental and subject to change
-  --> $DIR/feature-gate-cfg-target-has-atomic-equal-alignment.rs:8:10
-   |
-LL |     cfg!(target_has_atomic_equal_alignment = "64");
-   |          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-   |
-   = note: see issue #93822 <https://github.com/rust-lang/rust/issues/93822> for more information
-   = help: add `#![feature(cfg_target_has_atomic_equal_alignment)]` to the crate attributes to enable
-   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
-
-error[E0658]: `cfg(target_has_atomic_equal_alignment)` is experimental and subject to change
-  --> $DIR/feature-gate-cfg-target-has-atomic-equal-alignment.rs:10:10
-   |
-LL |     cfg!(target_has_atomic_equal_alignment = "128");
-   |          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-   |
-   = note: see issue #93822 <https://github.com/rust-lang/rust/issues/93822> for more information
-   = help: add `#![feature(cfg_target_has_atomic_equal_alignment)]` to the crate attributes to enable
-   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
-
-error[E0658]: `cfg(target_has_atomic_equal_alignment)` is experimental and subject to change
-  --> $DIR/feature-gate-cfg-target-has-atomic-equal-alignment.rs:12:10
-   |
-LL |     cfg!(target_has_atomic_equal_alignment = "ptr");
-   |          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-   |
-   = note: see issue #93822 <https://github.com/rust-lang/rust/issues/93822> for more information
-   = help: add `#![feature(cfg_target_has_atomic_equal_alignment)]` to the crate attributes to enable
-   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
-
-error: aborting due to 6 previous errors
-
-For more information about this error, try `rustc --explain E0658`.

From 665ec41c121f297dc0af12f19332454bd799ba90 Mon Sep 17 00:00:00 2001
From: Waffle Lapkin <waffle.lapkin@gmail.com>
Date: Mon, 11 May 2026 16:22:22 +0200
Subject: [PATCH 48/78] rename
 `cfg(target_has_atomic_{equal=>primitive}_alignment)`

---
 compiler/rustc_feature/src/accepted.rs        |  2 +-
 compiler/rustc_session/src/config/cfg.rs      |  6 ++--
 compiler/rustc_span/src/symbol.rs             |  2 +-
 library/core/src/lib.rs                       | 10 +++---
 library/core/src/sync/atomic.rs               | 32 +++++++++----------
 src/doc/rustc/src/check-cfg.md                |  2 +-
 tests/ui/cfg/disallowed-cli-cfgs.rs           |  6 ++--
 ....target_has_atomic_equal_alignment_.stderr |  4 +--
 ...get_has_atomic_primitive_alignment_.stderr |  8 +++++
 tests/ui/check-cfg/well-known-names.stderr    |  2 +-
 tests/ui/check-cfg/well-known-values.rs       |  4 +--
 tests/ui/check-cfg/well-known-values.stderr   | 12 +++----
 .../atomic-from-mut-not-available.rs          | 12 +++----
 13 files changed, 56 insertions(+), 46 deletions(-)
 create mode 100644 tests/ui/cfg/disallowed-cli-cfgs.target_has_atomic_primitive_alignment_.stderr

diff --git a/compiler/rustc_feature/src/accepted.rs b/compiler/rustc_feature/src/accepted.rs
index 7b7724879e1f5..eaf8eddfff3a5 100644
--- a/compiler/rustc_feature/src/accepted.rs
+++ b/compiler/rustc_feature/src/accepted.rs
@@ -108,7 +108,7 @@ declare_features! (
     (accepted, cfg_target_abi, "1.78.0", Some(80970)),
     /// Allows `cfg(target_feature = "...")`.
     (accepted, cfg_target_feature, "1.27.0", Some(29717)),
-    /// Allows `cfg(target_has_atomic_equal_alignment = "...")`.
+    /// Allows `cfg(target_has_atomic_primitive_alignment = "...")`.
     (accepted, cfg_target_has_atomic_equal_alignment, "CURRENT_RUSTC_VERSION", Some(93822)),
     /// Allows `cfg(target_vendor = "...")`.
     (accepted, cfg_target_vendor, "1.33.0", Some(29718)),
diff --git a/compiler/rustc_session/src/config/cfg.rs b/compiler/rustc_session/src/config/cfg.rs
index 9c76f4f3db92f..74641fd629bc9 100644
--- a/compiler/rustc_session/src/config/cfg.rs
+++ b/compiler/rustc_session/src/config/cfg.rs
@@ -149,7 +149,7 @@ pub(crate) fn disallow_cfgs(sess: &Session, user_cfgs: &Cfg) {
             | (sym::target_pointer_width, Some(_))
             | (sym::target_vendor, None | Some(_))
             | (sym::target_has_atomic, Some(_))
-            | (sym::target_has_atomic_equal_alignment, Some(_))
+            | (sym::target_has_atomic_primitive_alignment, Some(_))
             | (sym::target_has_atomic_load_store, Some(_))
             | (sym::target_has_reliable_f16, None | Some(_))
             | (sym::target_has_reliable_f16_math, None | Some(_))
@@ -293,7 +293,7 @@ pub(crate) fn default_configuration(sess: &Session) -> Cfg {
                     ins_sym!(sym::target_has_atomic, sym);
                 }
                 if align.bits() == i {
-                    ins_sym!(sym::target_has_atomic_equal_alignment, sym);
+                    ins_sym!(sym::target_has_atomic_primitive_alignment, sym);
                 }
                 ins_sym!(sym::target_has_atomic_load_store, sym);
             };
@@ -487,7 +487,7 @@ impl CheckCfg {
         ];
         for sym in [
             sym::target_has_atomic,
-            sym::target_has_atomic_equal_alignment,
+            sym::target_has_atomic_primitive_alignment,
             sym::target_has_atomic_load_store,
         ] {
             ins!(sym, no_values).extend(atomic_values);
diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
index 7f00cf203d827..b6703be8f4604 100644
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@@ -2021,8 +2021,8 @@ symbols! {
         target_feature_11,
         target_feature_inline_always,
         target_has_atomic,
-        target_has_atomic_equal_alignment,
         target_has_atomic_load_store,
+        target_has_atomic_primitive_alignment,
         target_has_reliable_f16,
         target_has_reliable_f16_math,
         target_has_reliable_f128,
diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs
index 01bfd18ab89d6..3e18f87e20537 100644
--- a/library/core/src/lib.rs
+++ b/library/core/src/lib.rs
@@ -61,11 +61,11 @@
     target_has_atomic = "32",
     target_has_atomic = "64",
     target_has_atomic = "ptr",
-    target_has_atomic_equal_alignment = "8",
-    target_has_atomic_equal_alignment = "16",
-    target_has_atomic_equal_alignment = "32",
-    target_has_atomic_equal_alignment = "64",
-    target_has_atomic_equal_alignment = "ptr",
+    target_has_atomic_primitive_alignment = "8",
+    target_has_atomic_primitive_alignment = "16",
+    target_has_atomic_primitive_alignment = "32",
+    target_has_atomic_primitive_alignment = "64",
+    target_has_atomic_primitive_alignment = "ptr",
     target_has_atomic_load_store = "8",
     target_has_atomic_load_store = "16",
     target_has_atomic_load_store = "32",
diff --git a/library/core/src/sync/atomic.rs b/library/core/src/sync/atomic.rs
index 004772267da74..8a9a0b52ffec3 100644
--- a/library/core/src/sync/atomic.rs
+++ b/library/core/src/sync/atomic.rs
@@ -622,7 +622,7 @@ impl AtomicBool {
     /// assert_eq!(some_bool, false);
     /// ```
     #[inline]
-    #[cfg(target_has_atomic_equal_alignment = "8")]
+    #[cfg(target_has_atomic_primitive_alignment = "8")]
     #[unstable(feature = "atomic_from_mut", issue = "76314")]
     pub fn from_mut(v: &mut bool) -> &mut Self {
         // SAFETY: the mutable reference guarantees unique ownership, and
@@ -682,7 +682,7 @@ impl AtomicBool {
     /// assert_eq!(some_bools, [true; 10]);
     /// ```
     #[inline]
-    #[cfg(target_has_atomic_equal_alignment = "8")]
+    #[cfg(target_has_atomic_primitive_alignment = "8")]
     #[unstable(feature = "atomic_from_mut", issue = "76314")]
     pub fn from_mut_slice(v: &mut [bool]) -> &mut [Self] {
         // SAFETY: the mutable reference guarantees unique ownership, and
@@ -1593,7 +1593,7 @@ impl<T> AtomicPtr<T> {
     /// assert_eq!(unsafe { *some_ptr }, 456);
     /// ```
     #[inline]
-    #[cfg(target_has_atomic_equal_alignment = "ptr")]
+    #[cfg(target_has_atomic_primitive_alignment = "ptr")]
     #[unstable(feature = "atomic_from_mut", issue = "76314")]
     pub fn from_mut(v: &mut *mut T) -> &mut Self {
         let [] = [(); align_of::<AtomicPtr<()>>() - align_of::<*mut ()>()];
@@ -1672,7 +1672,7 @@ impl<T> AtomicPtr<T> {
     /// }
     /// ```
     #[inline]
-    #[cfg(target_has_atomic_equal_alignment = "ptr")]
+    #[cfg(target_has_atomic_primitive_alignment = "ptr")]
     #[unstable(feature = "atomic_from_mut", issue = "76314")]
     pub fn from_mut_slice(v: &mut [*mut T]) -> &mut [Self] {
         // SAFETY:
@@ -3621,7 +3621,7 @@ macro_rules! atomic_int {
 #[cfg(target_has_atomic_load_store = "8")]
 atomic_int! {
     cfg(target_has_atomic = "8"),
-    cfg(target_has_atomic_equal_alignment = "8"),
+    cfg(target_has_atomic_primitive_alignment = "8"),
     stable(feature = "integer_atomics_stable", since = "1.34.0"),
     stable(feature = "integer_atomics_stable", since = "1.34.0"),
     stable(feature = "integer_atomics_stable", since = "1.34.0"),
@@ -3639,7 +3639,7 @@ atomic_int! {
 #[cfg(target_has_atomic_load_store = "8")]
 atomic_int! {
     cfg(target_has_atomic = "8"),
-    cfg(target_has_atomic_equal_alignment = "8"),
+    cfg(target_has_atomic_primitive_alignment = "8"),
     stable(feature = "integer_atomics_stable", since = "1.34.0"),
     stable(feature = "integer_atomics_stable", since = "1.34.0"),
     stable(feature = "integer_atomics_stable", since = "1.34.0"),
@@ -3657,7 +3657,7 @@ atomic_int! {
 #[cfg(target_has_atomic_load_store = "16")]
 atomic_int! {
     cfg(target_has_atomic = "16"),
-    cfg(target_has_atomic_equal_alignment = "16"),
+    cfg(target_has_atomic_primitive_alignment = "16"),
     stable(feature = "integer_atomics_stable", since = "1.34.0"),
     stable(feature = "integer_atomics_stable", since = "1.34.0"),
     stable(feature = "integer_atomics_stable", since = "1.34.0"),
@@ -3675,7 +3675,7 @@ atomic_int! {
 #[cfg(target_has_atomic_load_store = "16")]
 atomic_int! {
     cfg(target_has_atomic = "16"),
-    cfg(target_has_atomic_equal_alignment = "16"),
+    cfg(target_has_atomic_primitive_alignment = "16"),
     stable(feature = "integer_atomics_stable", since = "1.34.0"),
     stable(feature = "integer_atomics_stable", since = "1.34.0"),
     stable(feature = "integer_atomics_stable", since = "1.34.0"),
@@ -3693,7 +3693,7 @@ atomic_int! {
 #[cfg(target_has_atomic_load_store = "32")]
 atomic_int! {
     cfg(target_has_atomic = "32"),
-    cfg(target_has_atomic_equal_alignment = "32"),
+    cfg(target_has_atomic_primitive_alignment = "32"),
     stable(feature = "integer_atomics_stable", since = "1.34.0"),
     stable(feature = "integer_atomics_stable", since = "1.34.0"),
     stable(feature = "integer_atomics_stable", since = "1.34.0"),
@@ -3711,7 +3711,7 @@ atomic_int! {
 #[cfg(target_has_atomic_load_store = "32")]
 atomic_int! {
     cfg(target_has_atomic = "32"),
-    cfg(target_has_atomic_equal_alignment = "32"),
+    cfg(target_has_atomic_primitive_alignment = "32"),
     stable(feature = "integer_atomics_stable", since = "1.34.0"),
     stable(feature = "integer_atomics_stable", since = "1.34.0"),
     stable(feature = "integer_atomics_stable", since = "1.34.0"),
@@ -3729,7 +3729,7 @@ atomic_int! {
 #[cfg(target_has_atomic_load_store = "64")]
 atomic_int! {
     cfg(target_has_atomic = "64"),
-    cfg(target_has_atomic_equal_alignment = "64"),
+    cfg(target_has_atomic_primitive_alignment = "64"),
     stable(feature = "integer_atomics_stable", since = "1.34.0"),
     stable(feature = "integer_atomics_stable", since = "1.34.0"),
     stable(feature = "integer_atomics_stable", since = "1.34.0"),
@@ -3747,7 +3747,7 @@ atomic_int! {
 #[cfg(target_has_atomic_load_store = "64")]
 atomic_int! {
     cfg(target_has_atomic = "64"),
-    cfg(target_has_atomic_equal_alignment = "64"),
+    cfg(target_has_atomic_primitive_alignment = "64"),
     stable(feature = "integer_atomics_stable", since = "1.34.0"),
     stable(feature = "integer_atomics_stable", since = "1.34.0"),
     stable(feature = "integer_atomics_stable", since = "1.34.0"),
@@ -3765,7 +3765,7 @@ atomic_int! {
 #[cfg(target_has_atomic_load_store = "128")]
 atomic_int! {
     cfg(target_has_atomic = "128"),
-    cfg(target_has_atomic_equal_alignment = "128"),
+    cfg(target_has_atomic_primitive_alignment = "128"),
     unstable(feature = "integer_atomics", issue = "99069"),
     unstable(feature = "integer_atomics", issue = "99069"),
     unstable(feature = "integer_atomics", issue = "99069"),
@@ -3783,7 +3783,7 @@ atomic_int! {
 #[cfg(target_has_atomic_load_store = "128")]
 atomic_int! {
     cfg(target_has_atomic = "128"),
-    cfg(target_has_atomic_equal_alignment = "128"),
+    cfg(target_has_atomic_primitive_alignment = "128"),
     unstable(feature = "integer_atomics", issue = "99069"),
     unstable(feature = "integer_atomics", issue = "99069"),
     unstable(feature = "integer_atomics", issue = "99069"),
@@ -3805,7 +3805,7 @@ macro_rules! atomic_int_ptr_sized {
         #[cfg(target_pointer_width = $target_pointer_width)]
         atomic_int! {
             cfg(target_has_atomic = "ptr"),
-            cfg(target_has_atomic_equal_alignment = "ptr"),
+            cfg(target_has_atomic_primitive_alignment = "ptr"),
             stable(feature = "rust1", since = "1.0.0"),
             stable(feature = "extended_compare_and_swap", since = "1.10.0"),
             stable(feature = "atomic_debug", since = "1.3.0"),
@@ -3823,7 +3823,7 @@ macro_rules! atomic_int_ptr_sized {
         #[cfg(target_pointer_width = $target_pointer_width)]
         atomic_int! {
             cfg(target_has_atomic = "ptr"),
-            cfg(target_has_atomic_equal_alignment = "ptr"),
+            cfg(target_has_atomic_primitive_alignment = "ptr"),
             stable(feature = "rust1", since = "1.0.0"),
             stable(feature = "extended_compare_and_swap", since = "1.10.0"),
             stable(feature = "atomic_debug", since = "1.3.0"),
diff --git a/src/doc/rustc/src/check-cfg.md b/src/doc/rustc/src/check-cfg.md
index dfe036bf1bb19..a3e83d8e1d298 100644
--- a/src/doc/rustc/src/check-cfg.md
+++ b/src/doc/rustc/src/check-cfg.md
@@ -124,7 +124,7 @@ As of `2025-01-02T`, the list of known names is as follows:
  - `target_family`
  - `target_feature`
  - `target_has_atomic`
- - `target_has_atomic_equal_alignment`
+ - `target_has_atomic_primitive_alignment`
  - `target_has_atomic_load_store`
  - `target_os`
  - `target_pointer_width`
diff --git a/tests/ui/cfg/disallowed-cli-cfgs.rs b/tests/ui/cfg/disallowed-cli-cfgs.rs
index 1ce65a7d657e6..81f3d9313ef1f 100644
--- a/tests/ui/cfg/disallowed-cli-cfgs.rs
+++ b/tests/ui/cfg/disallowed-cli-cfgs.rs
@@ -1,10 +1,12 @@
+// ignore-tidy-linelength (target_has_atomic_primitive_alignment below overflows the linelength limit and  @ [revision]compile-flags isn't detected by tidy as something to ignore)
+
 //@ check-fail
 //@ revisions: overflow_checks_ debug_assertions_ ub_checks_ sanitize_
 //@ revisions: sanitizer_cfi_generalize_pointers_ sanitizer_cfi_normalize_integers_
 //@ revisions: proc_macro_ panic_ target_feature_ unix_ windows_ target_abi_
 //@ revisions: target_arch_ target_endian_ target_env_ target_family_ target_os_
 //@ revisions: target_object_format_ target_pointer_width_ target_vendor_
-//@ revisions: target_has_atomic_ target_has_atomic_equal_alignment_
+//@ revisions: target_has_atomic_ target_has_atomic_primitive_alignment_
 //@ revisions: target_has_atomic_load_store_ target_thread_local_ relocation_model_
 //@ revisions: fmt_debug_
 //@ revisions: emscripten_wasm_eh_
@@ -31,7 +33,7 @@
 //@ [target_pointer_width_]compile-flags: --cfg target_pointer_width="32"
 //@ [target_vendor_]compile-flags: --cfg target_vendor
 //@ [target_has_atomic_]compile-flags: --cfg target_has_atomic="32"
-//@ [target_has_atomic_equal_alignment_]compile-flags: --cfg target_has_atomic_equal_alignment="32"
+//@ [target_has_atomic_primitive_alignment_]compile-flags: --cfg target_has_atomic_primitive_alignment="32"
 //@ [target_has_atomic_load_store_]compile-flags: --cfg target_has_atomic_load_store="32"
 //@ [target_thread_local_]compile-flags: --cfg target_thread_local
 //@ [relocation_model_]compile-flags: --cfg relocation_model="a"
diff --git a/tests/ui/cfg/disallowed-cli-cfgs.target_has_atomic_equal_alignment_.stderr b/tests/ui/cfg/disallowed-cli-cfgs.target_has_atomic_equal_alignment_.stderr
index 096490a03f610..3cdc06822700c 100644
--- a/tests/ui/cfg/disallowed-cli-cfgs.target_has_atomic_equal_alignment_.stderr
+++ b/tests/ui/cfg/disallowed-cli-cfgs.target_has_atomic_equal_alignment_.stderr
@@ -1,6 +1,6 @@
-error: unexpected `--cfg target_has_atomic_equal_alignment="32"` flag
+error: unexpected `--cfg target_has_atomic_primitive_alignment="32"` flag
    |
-   = note: config `target_has_atomic_equal_alignment` is only supposed to be controlled by `--target`
+   = note: config `target_has_atomic_primitive_alignment` is only supposed to be controlled by `--target`
    = note: manually setting a built-in cfg can and does create incoherent behaviors
    = note: `#[deny(explicit_builtin_cfgs_in_flags)]` on by default
 
diff --git a/tests/ui/cfg/disallowed-cli-cfgs.target_has_atomic_primitive_alignment_.stderr b/tests/ui/cfg/disallowed-cli-cfgs.target_has_atomic_primitive_alignment_.stderr
new file mode 100644
index 0000000000000..3cdc06822700c
--- /dev/null
+++ b/tests/ui/cfg/disallowed-cli-cfgs.target_has_atomic_primitive_alignment_.stderr
@@ -0,0 +1,8 @@
+error: unexpected `--cfg target_has_atomic_primitive_alignment="32"` flag
+   |
+   = note: config `target_has_atomic_primitive_alignment` is only supposed to be controlled by `--target`
+   = note: manually setting a built-in cfg can and does create incoherent behaviors
+   = note: `#[deny(explicit_builtin_cfgs_in_flags)]` on by default
+
+error: aborting due to 1 previous error
+
diff --git a/tests/ui/check-cfg/well-known-names.stderr b/tests/ui/check-cfg/well-known-names.stderr
index d946377f2616b..ec1345fd6b3cd 100644
--- a/tests/ui/check-cfg/well-known-names.stderr
+++ b/tests/ui/check-cfg/well-known-names.stderr
@@ -26,8 +26,8 @@ LL | #[cfg(list_all_well_known_cfgs)]
 `target_family`
 `target_feature`
 `target_has_atomic`
-`target_has_atomic_equal_alignment`
 `target_has_atomic_load_store`
+`target_has_atomic_primitive_alignment`
 `target_object_format`
 `target_os`
 `target_pointer_width`
diff --git a/tests/ui/check-cfg/well-known-values.rs b/tests/ui/check-cfg/well-known-values.rs
index 5cb5a88745996..5f961989d720f 100644
--- a/tests/ui/check-cfg/well-known-values.rs
+++ b/tests/ui/check-cfg/well-known-values.rs
@@ -64,10 +64,10 @@
     // ^ tested in target_feature.rs
     target_has_atomic = "_UNEXPECTED_VALUE",
     //~^ WARN unexpected `cfg` condition value
-    target_has_atomic_equal_alignment = "_UNEXPECTED_VALUE",
-    //~^ WARN unexpected `cfg` condition value
     target_has_atomic_load_store = "_UNEXPECTED_VALUE",
     //~^ WARN unexpected `cfg` condition value
+    target_has_atomic_primitive_alignment = "_UNEXPECTED_VALUE",
+    //~^ WARN unexpected `cfg` condition value
     target_object_format = "_UNEXPECTED_VALUE",
     //~^ WARN unexpected `cfg` condition value
     target_os = "_UNEXPECTED_VALUE",
diff --git a/tests/ui/check-cfg/well-known-values.stderr b/tests/ui/check-cfg/well-known-values.stderr
index a35d8b025b8e2..345fd345a6dba 100644
--- a/tests/ui/check-cfg/well-known-values.stderr
+++ b/tests/ui/check-cfg/well-known-values.stderr
@@ -180,19 +180,19 @@ LL |     target_has_atomic = "_UNEXPECTED_VALUE",
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
   --> $DIR/well-known-values.rs:67:5
    |
-LL |     target_has_atomic_equal_alignment = "_UNEXPECTED_VALUE",
-   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+LL |     target_has_atomic_load_store = "_UNEXPECTED_VALUE",
+   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    |
-   = note: expected values for `target_has_atomic_equal_alignment` are: (none), `128`, `16`, `32`, `64`, `8`, and `ptr`
+   = note: expected values for `target_has_atomic_load_store` are: (none), `128`, `16`, `32`, `64`, `8`, and `ptr`
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
   --> $DIR/well-known-values.rs:69:5
    |
-LL |     target_has_atomic_load_store = "_UNEXPECTED_VALUE",
-   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+LL |     target_has_atomic_primitive_alignment = "_UNEXPECTED_VALUE",
+   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    |
-   = note: expected values for `target_has_atomic_load_store` are: (none), `128`, `16`, `32`, `64`, `8`, and `ptr`
+   = note: expected values for `target_has_atomic_primitive_alignment` are: (none), `128`, `16`, `32`, `64`, `8`, and `ptr`
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
diff --git a/tests/ui/stdlib-unit-tests/atomic-from-mut-not-available.rs b/tests/ui/stdlib-unit-tests/atomic-from-mut-not-available.rs
index b7c7f0ed98241..e8aad9cbeb9fa 100644
--- a/tests/ui/stdlib-unit-tests/atomic-from-mut-not-available.rs
+++ b/tests/ui/stdlib-unit-tests/atomic-from-mut-not-available.rs
@@ -1,11 +1,11 @@
-//! This test exercises the combined effect of the `cfg(target_has_atomic_equal_alignment = "...")`
-//! implementation in the compiler plus usage of said `cfg(target_has_atomic_equal_alignment)` in
-//! `core` for the `Atomic64::from_mut` API.
+//! This test exercises the combined effect of the
+//! `cfg(target_has_atomic_primitive_alignment = "...")` implementation in the compiler plus usage
+//! of said `cfg(target_has_atomic_primitive_alignment)` in `core` for the `Atomic64::from_mut` API.
 //!
 //! This test is a basic smoke test: that `AtomicU64::from_mut` is gated by
-//! `#[cfg(target_has_atomic_equal_alignment = "8")]`, which is only available on platforms where
-//! `AtomicU64` has the same alignment as `u64`. This is notably *not* satisfied by `x86_32`, where
-//! they have differing alignments. Thus, `AtomicU64::from_mut` should *not* be available on
+//! `#[cfg(target_has_atomic_primitive_alignment = "64")]`, which is only available on platforms
+//! where `AtomicU64` has the same alignment as `u64`. This is notably *not* satisfied by `x86_32`,
+//! where they have differing alignments. Thus, `AtomicU64::from_mut` should *not* be available on
 //! `x86_32` linux and should report assoc item not found, if the `cfg` is working correctly.
 //! Conversely, `AtomicU64::from_mut` *should* be available on `x86_64` linux where the alignment
 //! matches.

From 0e97ab333b3dd3b170412d523eb6076c3835e512 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Mon, 11 May 2026 21:28:47 +0530
Subject: [PATCH 49/78] Correct small typo in gen-arm

---
 .../stdarch-gen-arm/spec/neon/aarch64.spec.yml     | 14 ++++++--------
 .../crates/stdarch-gen-arm/src/fn_suffix.rs        |  2 +-
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index cfd44332ec304..102447eae8ddc 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -6131,7 +6131,7 @@ intrinsics:
           - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
       - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]]
 
-  - name: "vcmla{type[3]}"
+  - name: "vcmla{neon_type[0].rot180_lane}"
     doc: Floating-point complex multiply accumulate
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
@@ -6143,8 +6143,8 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]', '_rot180_lane_f32']
-      - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]', 'q_rot180_lane_f32']
+      - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
     compose:
       - FnCall: [static_assert!, ['LANE == 0']]
       - Let:
@@ -6153,7 +6153,7 @@ intrinsics:
           - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
       - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]]
 
-  - name: "vcmla{type[3]}"
+  - name: "vcmla{neon_type[0].rot180_lane}"
     doc: Floating-point complex multiply accumulate
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
@@ -6167,10 +6167,8 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]', '_rot180_lane_f16']
-      - [float16x8_t, float16x4_t,
-          '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]', 'q_rot180_lane_f16'
-        ]
+      - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float16x8_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 1]]
       - Let:
diff --git a/library/stdarch/crates/stdarch-gen-arm/src/fn_suffix.rs b/library/stdarch/crates/stdarch-gen-arm/src/fn_suffix.rs
index 26c156ae178aa..6fba3dc74476e 100644
--- a/library/stdarch/crates/stdarch-gen-arm/src/fn_suffix.rs
+++ b/library/stdarch/crates/stdarch-gen-arm/src/fn_suffix.rs
@@ -188,7 +188,7 @@ impl FromStr for SuffixKind {
             "rot90_lane" => Ok(SuffixKind::Rot90Lane),
             "rot90_laneq" => Ok(SuffixKind::Rot90LaneQ),
             "rot180" => Ok(SuffixKind::Rot180),
-            "rot180_lane" => Ok(SuffixKind::Rot180LaneQ),
+            "rot180_lane" => Ok(SuffixKind::Rot180Lane),
             "rot180_laneq" => Ok(SuffixKind::Rot180LaneQ),
             "u" => Ok(SuffixKind::Unsigned),
             "nox" => Ok(SuffixKind::NoX),

From ffe6e2ad98aa769d84cd10aa4fe1e3ae40454713 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Mon, 11 May 2026 21:29:36 +0530
Subject: [PATCH 50/78] Implement `vcmla_lane` with ARM intrinsics

---
 .../core_arch/src/aarch64/neon/generated.rs   | 504 +++++-------------
 .../spec/neon/aarch64.spec.yml                | 172 +++---
 2 files changed, 208 insertions(+), 468 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 11c3a52870493..3241583cf07ca 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -3001,19 +3001,10 @@ pub fn vcmla_lane_f16<const LANE: i32>(
     c: float16x4_t,
 ) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float16x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmla_f16(a, b, c)
-    }
+    let c = vreinterpret_u32_f16(c);
+    let c = vdup_lane_u32::<LANE>(c);
+    let c = vreinterpret_f16_u32(c);
+    vcmla_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_lane_f16)"]
@@ -3030,23 +3021,10 @@ pub fn vcmlaq_lane_f16<const LANE: i32>(
     c: float16x4_t,
 ) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float16x8_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_f16(a, b, c)
-    }
+    let c = vreinterpret_u32_f16(c);
+    let c = vdupq_lane_u32::<LANE>(c);
+    let c = vreinterpretq_f16_u32(c);
+    vcmlaq_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_lane_f32)"]
@@ -3061,10 +3039,10 @@ pub fn vcmla_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x2_t {
     static_assert!(LANE == 0);
-    unsafe {
-        let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
-        vcmla_f32(a, b, c)
-    }
+    let c = vreinterpret_u64_f32(c);
+    let c = vdup_lane_u64::<LANE>(c);
+    let c = vreinterpret_f32_u64(c);
+    vcmla_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_lane_f32)"]
@@ -3079,19 +3057,10 @@ pub fn vcmlaq_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x4_t {
     static_assert!(LANE == 0);
-    unsafe {
-        let c: float32x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_f32(a, b, c)
-    }
+    let c = vreinterpret_u64_f32(c);
+    let c = vdupq_lane_u64::<LANE>(c);
+    let c = vreinterpretq_f32_u64(c);
+    vcmlaq_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_laneq_f16)"]
@@ -3108,19 +3077,10 @@ pub fn vcmla_laneq_f16<const LANE: i32>(
     c: float16x8_t,
 ) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: float16x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmla_f16(a, b, c)
-    }
+    let c = vreinterpretq_u32_f16(c);
+    let c = vdup_laneq_u32::<LANE>(c);
+    let c = vreinterpret_f16_u32(c);
+    vcmla_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_laneq_f16)"]
@@ -3137,23 +3097,10 @@ pub fn vcmlaq_laneq_f16<const LANE: i32>(
     c: float16x8_t,
 ) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: float16x8_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_f16(a, b, c)
-    }
+    let c = vreinterpretq_u32_f16(c);
+    let c = vdupq_laneq_u32::<LANE>(c);
+    let c = vreinterpretq_f16_u32(c);
+    vcmlaq_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_laneq_f32)"]
@@ -3168,10 +3115,10 @@ pub fn vcmla_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
-        vcmla_f32(a, b, c)
-    }
+    let c = vreinterpretq_u64_f32(c);
+    let c = vdup_laneq_u64::<LANE>(c);
+    let c = vreinterpret_f32_u64(c);
+    vcmla_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_laneq_f32)"]
@@ -3186,19 +3133,10 @@ pub fn vcmlaq_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float32x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_f32(a, b, c)
-    }
+    let c = vreinterpretq_u64_f32(c);
+    let c = vdupq_laneq_u64::<LANE>(c);
+    let c = vreinterpretq_f32_u64(c);
+    vcmlaq_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_f16)"]
@@ -3299,19 +3237,10 @@ pub fn vcmla_rot180_lane_f16<const LANE: i32>(
     c: float16x4_t,
 ) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float16x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmla_rot180_f16(a, b, c)
-    }
+    let c = vreinterpret_u32_f16(c);
+    let c = vdup_lane_u32::<LANE>(c);
+    let c = vreinterpret_f16_u32(c);
+    vcmla_rot180_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_lane_f16)"]
@@ -3328,23 +3257,10 @@ pub fn vcmlaq_rot180_lane_f16<const LANE: i32>(
     c: float16x4_t,
 ) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float16x8_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_rot180_f16(a, b, c)
-    }
+    let c = vreinterpret_u32_f16(c);
+    let c = vdupq_lane_u32::<LANE>(c);
+    let c = vreinterpretq_f16_u32(c);
+    vcmlaq_rot180_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_lane_f32)"]
@@ -3359,10 +3275,10 @@ pub fn vcmla_rot180_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x2_t {
     static_assert!(LANE == 0);
-    unsafe {
-        let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
-        vcmla_rot180_f32(a, b, c)
-    }
+    let c = vreinterpret_u64_f32(c);
+    let c = vdup_lane_u64::<LANE>(c);
+    let c = vreinterpret_f32_u64(c);
+    vcmla_rot180_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_lane_f32)"]
@@ -3377,19 +3293,10 @@ pub fn vcmlaq_rot180_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x4_t {
     static_assert!(LANE == 0);
-    unsafe {
-        let c: float32x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_rot180_f32(a, b, c)
-    }
+    let c = vreinterpret_u64_f32(c);
+    let c = vdupq_lane_u64::<LANE>(c);
+    let c = vreinterpretq_f32_u64(c);
+    vcmlaq_rot180_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_laneq_f16)"]
@@ -3406,19 +3313,10 @@ pub fn vcmla_rot180_laneq_f16<const LANE: i32>(
     c: float16x8_t,
 ) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: float16x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmla_rot180_f16(a, b, c)
-    }
+    let c = vreinterpretq_u32_f16(c);
+    let c = vdup_laneq_u32::<LANE>(c);
+    let c = vreinterpret_f16_u32(c);
+    vcmla_rot180_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_laneq_f16)"]
@@ -3435,23 +3333,10 @@ pub fn vcmlaq_rot180_laneq_f16<const LANE: i32>(
     c: float16x8_t,
 ) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: float16x8_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_rot180_f16(a, b, c)
-    }
+    let c = vreinterpretq_u32_f16(c);
+    let c = vdupq_laneq_u32::<LANE>(c);
+    let c = vreinterpretq_f16_u32(c);
+    vcmlaq_rot180_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_laneq_f32)"]
@@ -3466,10 +3351,10 @@ pub fn vcmla_rot180_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
-        vcmla_rot180_f32(a, b, c)
-    }
+    let c = vreinterpretq_u64_f32(c);
+    let c = vdup_laneq_u64::<LANE>(c);
+    let c = vreinterpret_f32_u64(c);
+    vcmla_rot180_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_laneq_f32)"]
@@ -3484,19 +3369,10 @@ pub fn vcmlaq_rot180_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float32x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_rot180_f32(a, b, c)
-    }
+    let c = vreinterpretq_u64_f32(c);
+    let c = vdupq_laneq_u64::<LANE>(c);
+    let c = vreinterpretq_f32_u64(c);
+    vcmlaq_rot180_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_f16)"]
@@ -3597,19 +3473,10 @@ pub fn vcmla_rot270_lane_f16<const LANE: i32>(
     c: float16x4_t,
 ) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float16x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmla_rot270_f16(a, b, c)
-    }
+    let c = vreinterpret_u32_f16(c);
+    let c = vdup_lane_u32::<LANE>(c);
+    let c = vreinterpret_f16_u32(c);
+    vcmla_rot270_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_lane_f16)"]
@@ -3626,23 +3493,10 @@ pub fn vcmlaq_rot270_lane_f16<const LANE: i32>(
     c: float16x4_t,
 ) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float16x8_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_rot270_f16(a, b, c)
-    }
+    let c = vreinterpret_u32_f16(c);
+    let c = vdupq_lane_u32::<LANE>(c);
+    let c = vreinterpretq_f16_u32(c);
+    vcmlaq_rot270_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_lane_f32)"]
@@ -3657,10 +3511,10 @@ pub fn vcmla_rot270_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x2_t {
     static_assert!(LANE == 0);
-    unsafe {
-        let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
-        vcmla_rot270_f32(a, b, c)
-    }
+    let c = vreinterpret_u64_f32(c);
+    let c = vdup_lane_u64::<LANE>(c);
+    let c = vreinterpret_f32_u64(c);
+    vcmla_rot270_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_lane_f32)"]
@@ -3675,19 +3529,10 @@ pub fn vcmlaq_rot270_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x4_t {
     static_assert!(LANE == 0);
-    unsafe {
-        let c: float32x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_rot270_f32(a, b, c)
-    }
+    let c = vreinterpret_u64_f32(c);
+    let c = vdupq_lane_u64::<LANE>(c);
+    let c = vreinterpretq_f32_u64(c);
+    vcmlaq_rot270_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_laneq_f16)"]
@@ -3704,19 +3549,10 @@ pub fn vcmla_rot270_laneq_f16<const LANE: i32>(
     c: float16x8_t,
 ) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: float16x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmla_rot270_f16(a, b, c)
-    }
+    let c = vreinterpretq_u32_f16(c);
+    let c = vdup_laneq_u32::<LANE>(c);
+    let c = vreinterpret_f16_u32(c);
+    vcmla_rot270_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_laneq_f16)"]
@@ -3733,23 +3569,10 @@ pub fn vcmlaq_rot270_laneq_f16<const LANE: i32>(
     c: float16x8_t,
 ) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: float16x8_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_rot270_f16(a, b, c)
-    }
+    let c = vreinterpretq_u32_f16(c);
+    let c = vdupq_laneq_u32::<LANE>(c);
+    let c = vreinterpretq_f16_u32(c);
+    vcmlaq_rot270_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_laneq_f32)"]
@@ -3764,10 +3587,10 @@ pub fn vcmla_rot270_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
-        vcmla_rot270_f32(a, b, c)
-    }
+    let c = vreinterpretq_u64_f32(c);
+    let c = vdup_laneq_u64::<LANE>(c);
+    let c = vreinterpret_f32_u64(c);
+    vcmla_rot270_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_laneq_f32)"]
@@ -3782,19 +3605,10 @@ pub fn vcmlaq_rot270_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float32x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_rot270_f32(a, b, c)
-    }
+    let c = vreinterpretq_u64_f32(c);
+    let c = vdupq_laneq_u64::<LANE>(c);
+    let c = vreinterpretq_f32_u64(c);
+    vcmlaq_rot270_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_f16)"]
@@ -3895,19 +3709,10 @@ pub fn vcmla_rot90_lane_f16<const LANE: i32>(
     c: float16x4_t,
 ) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float16x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmla_rot90_f16(a, b, c)
-    }
+    let c = vreinterpret_u32_f16(c);
+    let c = vdup_lane_u32::<LANE>(c);
+    let c = vreinterpret_f16_u32(c);
+    vcmla_rot90_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_lane_f16)"]
@@ -3924,23 +3729,10 @@ pub fn vcmlaq_rot90_lane_f16<const LANE: i32>(
     c: float16x4_t,
 ) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float16x8_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_rot90_f16(a, b, c)
-    }
+    let c = vreinterpret_u32_f16(c);
+    let c = vdupq_lane_u32::<LANE>(c);
+    let c = vreinterpretq_f16_u32(c);
+    vcmlaq_rot90_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_lane_f32)"]
@@ -3955,10 +3747,10 @@ pub fn vcmla_rot90_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x2_t {
     static_assert!(LANE == 0);
-    unsafe {
-        let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
-        vcmla_rot90_f32(a, b, c)
-    }
+    let c = vreinterpret_u64_f32(c);
+    let c = vdup_lane_u64::<LANE>(c);
+    let c = vreinterpret_f32_u64(c);
+    vcmla_rot90_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_lane_f32)"]
@@ -3973,19 +3765,10 @@ pub fn vcmlaq_rot90_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x4_t {
     static_assert!(LANE == 0);
-    unsafe {
-        let c: float32x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_rot90_f32(a, b, c)
-    }
+    let c = vreinterpret_u64_f32(c);
+    let c = vdupq_lane_u64::<LANE>(c);
+    let c = vreinterpretq_f32_u64(c);
+    vcmlaq_rot90_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_laneq_f16)"]
@@ -4002,19 +3785,10 @@ pub fn vcmla_rot90_laneq_f16<const LANE: i32>(
     c: float16x8_t,
 ) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: float16x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmla_rot90_f16(a, b, c)
-    }
+    let c = vreinterpretq_u32_f16(c);
+    let c = vdup_laneq_u32::<LANE>(c);
+    let c = vreinterpret_f16_u32(c);
+    vcmla_rot90_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_laneq_f16)"]
@@ -4031,23 +3805,10 @@ pub fn vcmlaq_rot90_laneq_f16<const LANE: i32>(
     c: float16x8_t,
 ) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: float16x8_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_rot90_f16(a, b, c)
-    }
+    let c = vreinterpretq_u32_f16(c);
+    let c = vdupq_laneq_u32::<LANE>(c);
+    let c = vreinterpretq_f16_u32(c);
+    vcmlaq_rot90_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_laneq_f32)"]
@@ -4062,10 +3823,10 @@ pub fn vcmla_rot90_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
-        vcmla_rot90_f32(a, b, c)
-    }
+    let c = vreinterpretq_u64_f32(c);
+    let c = vdup_laneq_u64::<LANE>(c);
+    let c = vreinterpret_f32_u64(c);
+    vcmla_rot90_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_laneq_f32)"]
@@ -4080,19 +3841,10 @@ pub fn vcmlaq_rot90_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float32x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_rot90_f32(a, b, c)
-    }
+    let c = vreinterpretq_u64_f32(c);
+    let c = vdupq_laneq_u64::<LANE>(c);
+    let c = vreinterpretq_f32_u64(c);
+    vcmlaq_rot90_f32(a, b, c)
 }
 #[doc = "Join two smaller vectors into a single larger vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f64)"]
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 102447eae8ddc..2f7f2fc2b0cba 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -5914,14 +5914,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float32x2_t, float32x4_t, '']
+      - [float32x4_t, float32x4_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 1]]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpretq_u64_f32, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_laneq_u64', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}]
       - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]]
 
   - name: "vcmla{neon_type[0].laneq_nox}"
@@ -5938,14 +5937,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float16x8_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float16x4_t, float16x8_t, '']
+      - [float16x8_t, float16x8_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 2]]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpretq_u32_f16, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_laneq_u32', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}]
       - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]]
 
   - name: "vcmla{neon_type[0].rot90_laneq}"
@@ -5960,14 +5958,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float32x2_t, float32x4_t, '']
+      - [float32x4_t, float32x4_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 1]]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpretq_u64_f32, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_laneq_u64', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}]
       - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]]
 
   - name: "vcmla{neon_type[0].rot90_laneq}"
@@ -5984,14 +5981,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float16x8_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float16x4_t, float16x8_t, '']
+      - [float16x8_t, float16x8_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 2]]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpretq_u32_f16, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_laneq_u32', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}]
       - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]]
 
   - name: "vcmla{neon_type[0].rot90_lane}"
@@ -6006,14 +6002,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float32x2_t, float32x2_t, '']
+      - [float32x4_t, float32x2_t, 'q']
     compose:
       - FnCall: [static_assert!, ['LANE == 0']]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpret_u64_f32, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_lane_u64', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}]
       - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]]
 
   - name: "vcmla{neon_type[0].rot90_lane}"
@@ -6030,14 +6025,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float16x8_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float16x4_t, float16x4_t, '']
+      - [float16x8_t, float16x4_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 1]]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpret_u32_f16, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_lane_u32', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}]
       - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]]
 
   - name: "vcmla{neon_type.rot180}"
@@ -6095,14 +6089,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float32x2_t, float32x4_t, '']
+      - [float32x4_t, float32x4_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 1]]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpretq_u64_f32, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_laneq_u64', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}]
       - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]]
 
   - name: "vcmla{neon_type[0].rot180_laneq}"
@@ -6119,16 +6112,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float16x8_t, float16x8_t,
-        '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'
-        ]
+      - [float16x4_t, float16x8_t, '']
+      - [float16x8_t, float16x8_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 2]]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpretq_u32_f16, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_laneq_u32', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}]
       - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]]
 
   - name: "vcmla{neon_type[0].rot180_lane}"
@@ -6143,14 +6133,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float32x2_t, float32x2_t, '']
+      - [float32x4_t, float32x2_t, 'q']
     compose:
       - FnCall: [static_assert!, ['LANE == 0']]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpret_u64_f32, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_lane_u64', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}]
       - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]]
 
   - name: "vcmla{neon_type[0].rot180_lane}"
@@ -6167,14 +6156,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float16x8_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float16x4_t, float16x4_t, '']
+      - [float16x8_t, float16x4_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 1]]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpret_u32_f16, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_lane_u32', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}]
       - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]]
 
   - name: "vcmla{neon_type[0].rot270_laneq}"
@@ -6189,14 +6177,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float32x2_t, float32x4_t, '']
+      - [float32x4_t, float32x4_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 1]]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpretq_u64_f32, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_laneq_u64', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}]
       - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]]
 
   - name: "vcmla{neon_type[0].rot270_laneq}"
@@ -6213,14 +6200,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float16x8_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float16x4_t, float16x8_t, '']
+      - [float16x8_t, float16x8_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 2]]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpretq_u32_f16, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_laneq_u32', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}]
       - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]]
 
   - name: "vcmla{neon_type[0].lane_nox}"
@@ -6235,14 +6221,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float32x2_t, float32x2_t, '']
+      - [float32x4_t, float32x2_t, 'q']
     compose:
       - FnCall: [static_assert!, ['LANE == 0']]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpret_u64_f32, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_lane_u64', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}]
       - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]]
 
 
@@ -6260,14 +6245,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float16x8_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float16x4_t, float16x4_t, '']
+      - [float16x8_t, float16x4_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 1]]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpret_u32_f16, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_lane_u32', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}]
       - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]]
 
   - name: "vcmla{neon_type[0].rot270_lane}"
@@ -6282,11 +6266,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float32x2_t, float32x2_t, '']
+      - [float32x4_t, float32x2_t, 'q']
     compose:
       - FnCall: [static_assert!, ['LANE == 0']]
-      - Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]}]
+      - Let: [c, {FnCall: [vreinterpret_u64_f32, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_lane_u64', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}]
       - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]]
 
   - name: "vcmla{neon_type[0].rot270_lane}"
@@ -6303,11 +6289,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float16x8_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float16x4_t, float16x4_t, '']
+      - [float16x8_t, float16x4_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 1]]
-      - Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]}]
+      - Let: [c, {FnCall: [vreinterpret_u32_f16, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_lane_u32', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}]
       - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]]
 
   - name: "vmax{neon_type.no}"

From c477e713b6f8d93dc904e3eda97e71366a81990c Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Mon, 9 Mar 2026 05:55:05 +0530
Subject: [PATCH 51/78] Remove code for compiling and comparing C and Rust
 files, made the C files wrappers

---
 .../crates/intrinsic-test/src/arm/compile.rs  |  51 -----
 .../crates/intrinsic-test/src/arm/config.rs   |  45 -----
 .../crates/intrinsic-test/src/arm/mod.rs      |  10 +-
 .../crates/intrinsic-test/src/arm/types.rs    | 111 +---------
 .../intrinsic-test/src/common/argument.rs     |  84 +++-----
 .../crates/intrinsic-test/src/common/cli.rs   |  65 +-----
 .../intrinsic-test/src/common/compare.rs      | 144 -------------
 .../intrinsic-test/src/common/compile_c.rs    | 136 -------------
 .../crates/intrinsic-test/src/common/gen_c.rs | 165 ++++-----------
 .../intrinsic-test/src/common/gen_rust.rs     | 104 ----------
 .../src/common/intrinsic_helpers.rs           | 134 +------------
 .../crates/intrinsic-test/src/common/mod.rs   | 125 +-----------
 .../stdarch/crates/intrinsic-test/src/main.rs |  28 +--
 .../crates/intrinsic-test/src/x86/compile.rs  |  59 ------
 .../crates/intrinsic-test/src/x86/config.rs   | 189 ------------------
 .../crates/intrinsic-test/src/x86/mod.rs      |   8 -
 .../crates/intrinsic-test/src/x86/types.rs    | 155 +-------------
 17 files changed, 90 insertions(+), 1523 deletions(-)
 delete mode 100644 library/stdarch/crates/intrinsic-test/src/arm/compile.rs
 delete mode 100644 library/stdarch/crates/intrinsic-test/src/common/compare.rs
 delete mode 100644 library/stdarch/crates/intrinsic-test/src/common/compile_c.rs
 delete mode 100644 library/stdarch/crates/intrinsic-test/src/x86/compile.rs

diff --git a/library/stdarch/crates/intrinsic-test/src/arm/compile.rs b/library/stdarch/crates/intrinsic-test/src/arm/compile.rs
deleted file mode 100644
index a672da2cc0d6b..0000000000000
--- a/library/stdarch/crates/intrinsic-test/src/arm/compile.rs
+++ /dev/null
@@ -1,51 +0,0 @@
-use crate::common::cli::ProcessedCli;
-use crate::common::compile_c::{CompilationCommandBuilder, CppCompilation};
-
-pub fn build_cpp_compilation(config: &ProcessedCli) -> Option<CppCompilation> {
-    let cpp_compiler = config.cpp_compiler.as_ref()?;
-
-    // -ffp-contract=off emulates Rust's approach of not fusing separate mul-add operations
-    let mut command = CompilationCommandBuilder::new()
-        .add_arch_flags(["armv8.6-a", "crypto", "crc", "dotprod", "fp16"])
-        .set_compiler(cpp_compiler)
-        .set_target(&config.target)
-        .set_opt_level("2")
-        .set_cxx_toolchain_dir(config.cxx_toolchain_dir.as_deref())
-        .set_project_root("c_programs")
-        .add_extra_flags(["-ffp-contract=off", "-Wno-narrowing"]);
-
-    if !config.target.contains("v7") {
-        command = command.add_arch_flags(["faminmax", "lut", "sha3", "fp8"]);
-    }
-
-    if !cpp_compiler.contains("clang") {
-        command = command.add_extra_flag("-flax-vector-conversions");
-    }
-
-    let mut cpp_compiler = command.into_cpp_compilation();
-
-    if config.target.contains("aarch64_be") {
-        let Some(ref cxx_toolchain_dir) = config.cxx_toolchain_dir else {
-            panic!(
-                "target `{}` must specify `cxx_toolchain_dir`",
-                config.target
-            )
-        };
-
-        cpp_compiler.command_mut().args([
-            &format!("--sysroot={cxx_toolchain_dir}/aarch64_be-none-linux-gnu/libc"),
-            "--include-directory",
-            &format!("{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/include/c++/14.3.1"),
-            "--include-directory",
-            &format!("{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/include/c++/14.3.1/aarch64_be-none-linux-gnu"),
-            "-L",
-            &format!("{cxx_toolchain_dir}/lib/gcc/aarch64_be-none-linux-gnu/14.3.1"),
-            "-L",
-            &format!("{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/libc/usr/lib"),
-            "-B",
-            &format!("{cxx_toolchain_dir}/lib/gcc/aarch64_be-none-linux-gnu/14.3.1"),
-        ]);
-    }
-
-    Some(cpp_compiler)
-}
diff --git a/library/stdarch/crates/intrinsic-test/src/arm/config.rs b/library/stdarch/crates/intrinsic-test/src/arm/config.rs
index 60bb0ca56cefa..9371db737ea22 100644
--- a/library/stdarch/crates/intrinsic-test/src/arm/config.rs
+++ b/library/stdarch/crates/intrinsic-test/src/arm/config.rs
@@ -3,51 +3,6 @@ pub const NOTICE: &str = "\
 // test are derived from a JSON specification, published under the same license as the
 // `intrinsic-test` crate.\n";
 
-pub const PLATFORM_C_FORWARD_DECLARATIONS: &str = r#"
-#ifdef __aarch64__
-std::ostream& operator<<(std::ostream& os, poly128_t value);
-#endif
-
-std::ostream& operator<<(std::ostream& os, float16_t value);
-std::ostream& operator<<(std::ostream& os, uint8_t value);
-
-// T1 is the `To` type, T2 is the `From` type
-template<typename T1, typename T2> T1 cast(T2 x) {
-  static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same");
-  T1 ret{};
-  memcpy(&ret, &x, sizeof(T1));
-  return ret;
-}
-"#;
-
-pub const PLATFORM_C_DEFINITIONS: &str = r#"
-#ifdef __aarch64__
-std::ostream& operator<<(std::ostream& os, poly128_t value) {
-    std::stringstream temp;
-    do {
-      int n = value % 10;
-      value /= 10;
-      temp << n;
-    } while (value != 0);
-    std::string tempstr(temp.str());
-    std::string res(tempstr.rbegin(), tempstr.rend());
-    os << res;
-    return os;
-}
-
-#endif
-
-std::ostream& operator<<(std::ostream& os, float16_t value) {
-    os << static_cast<float>(value);
-    return os;
-}
-
-std::ostream& operator<<(std::ostream& os, uint8_t value) {
-    os << (unsigned int) value;
-    return os;
-}
-"#;
-
 pub const PLATFORM_RUST_DEFINITIONS: &str = "";
 
 pub const PLATFORM_RUST_CFGS: &str = r#"
diff --git a/library/stdarch/crates/intrinsic-test/src/arm/mod.rs b/library/stdarch/crates/intrinsic-test/src/arm/mod.rs
index 99c8da854c506..e365a5372f92e 100644
--- a/library/stdarch/crates/intrinsic-test/src/arm/mod.rs
+++ b/library/stdarch/crates/intrinsic-test/src/arm/mod.rs
@@ -1,5 +1,4 @@
 mod argument;
-mod compile;
 mod config;
 mod intrinsic;
 mod json_parser;
@@ -7,7 +6,6 @@ mod types;
 
 use crate::common::SupportedArchitectureTest;
 use crate::common::cli::ProcessedCli;
-use crate::common::compile_c::CppCompilation;
 use crate::common::intrinsic::Intrinsic;
 use crate::common::intrinsic_helpers::TypeKind;
 use intrinsic::ArmIntrinsicType;
@@ -32,18 +30,12 @@ impl SupportedArchitectureTest for ArmArchitectureTest {
     const NOTICE: &str = config::NOTICE;
 
     const PLATFORM_C_HEADERS: &[&str] = &["arm_neon.h", "arm_acle.h", "arm_fp16.h"];
-    const PLATFORM_C_DEFINITIONS: &str = config::PLATFORM_C_DEFINITIONS;
-    const PLATFORM_C_FORWARD_DECLARATIONS: &str = config::PLATFORM_C_FORWARD_DECLARATIONS;
 
     const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS;
     const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS;
 
-    fn cpp_compilation(&self) -> Option<CppCompilation> {
-        compile::build_cpp_compilation(&self.cli_options)
-    }
-
     fn create(cli_options: ProcessedCli) -> Self {
-        let a32 = cli_options.target.contains("v7");
+        let a32 = cli_options.target.starts_with("armv7");
         let mut intrinsics = get_neon_intrinsics(&cli_options.filename, &cli_options.target)
             .expect("Error parsing input file");
 
diff --git a/library/stdarch/crates/intrinsic-test/src/arm/types.rs b/library/stdarch/crates/intrinsic-test/src/arm/types.rs
index 18468bd5581f8..8424379db59f2 100644
--- a/library/stdarch/crates/intrinsic-test/src/arm/types.rs
+++ b/library/stdarch/crates/intrinsic-test/src/arm/types.rs
@@ -1,6 +1,4 @@
 use super::intrinsic::ArmIntrinsicType;
-use crate::common::cli::Language;
-use crate::common::indentation::Indentation;
 use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, TypeKind};
 
 impl IntrinsicTypeDefinition for ArmIntrinsicType {
@@ -20,19 +18,8 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
         }
     }
 
-    fn c_single_vector_type(&self) -> String {
-        if let (Some(bit_len), Some(simd_len)) = (self.bit_len, self.simd_len) {
-            format!(
-                "{prefix}{bit_len}x{simd_len}_t",
-                prefix = self.kind.c_prefix()
-            )
-        } else {
-            unreachable!("Shouldn't be called on this type")
-        }
-    }
-
     /// Determines the load function for this type.
-    fn get_load_function(&self, language: Language) -> String {
+    fn get_load_function(&self) -> String {
         if let IntrinsicType {
             kind: k,
             bit_len: Some(bl),
@@ -47,16 +34,13 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
                 ""
             };
 
-            let choose_workaround = language == Language::C && self.target.contains("v7");
             format!(
                 "vld{len}{quad}_{type}{size}",
                 type = match k {
                     TypeKind::Int(Sign::Unsigned) => "u",
                     TypeKind::Int(Sign::Signed) => "s",
                     TypeKind::Float => "f",
-                    // The ACLE doesn't support 64-bit polynomial loads on Armv7
-                    // if armv7 and bl == 64, use "s", else "p"
-                    TypeKind::Poly => if choose_workaround && *bl == 64 {"s"} else {"p"},
+                    TypeKind::Poly => "p",
                     x => todo!("get_load_function TypeKind: {x:#?}"),
                 },
                 size = bl,
@@ -67,97 +51,6 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
             todo!("get_load_function IntrinsicType: {self:#?}")
         }
     }
-
-    /// Determines the get lane function for this type.
-    fn get_lane_function(&self) -> String {
-        if let IntrinsicType {
-            kind: k,
-            bit_len: Some(bl),
-            simd_len,
-            ..
-        } = &self.data
-        {
-            let quad = if (simd_len.unwrap_or(1) * bl) > 64 {
-                "q"
-            } else {
-                ""
-            };
-            format!(
-                "vget{quad}_lane_{type}{size}",
-                type = match k {
-                    TypeKind::Int(Sign::Unsigned) => "u",
-                    TypeKind::Int(Sign::Signed) => "s",
-                    TypeKind::Float => "f",
-                    TypeKind::Poly => "p",
-                    x => todo!("get_load_function TypeKind: {x:#?}"),
-                },
-                size = bl,
-                quad = quad,
-            )
-        } else {
-            todo!("get_lane_function IntrinsicType: {self:#?}")
-        }
-    }
-
-    /// Generates a std::cout for the intrinsics results that will match the
-    /// rust debug output format for the return type. The generated line assumes
-    /// there is an int i in scope which is the current pass number.
-    fn print_result_c(&self, indentation: Indentation, additional: &str) -> String {
-        let lanes = if self.num_vectors() > 1 {
-            (0..self.num_vectors())
-                .map(|vector| {
-                    format!(
-                        r#""{ty}(" << {lanes} << ")""#,
-                        ty = self.c_single_vector_type(),
-                        lanes = (0..self.num_lanes())
-                            .map(move |idx| -> std::string::String {
-                                let lane_fn = self.get_lane_function();
-                                let final_cast = self.generate_final_type_cast();
-                                format!(
-                                    "{final_cast}{lane_fn}(__return_value.val[{vector}], {idx})"
-                                )
-                            })
-                            .collect::<Vec<_>>()
-                            .join(r#" << ", " << "#)
-                    )
-                })
-                .collect::<Vec<_>>()
-                .join(r#" << ", " << "#)
-        } else if self.num_lanes() > 1 {
-            (0..self.num_lanes())
-                .map(|idx| -> std::string::String {
-                    let lane_fn = self.get_lane_function();
-                    let final_cast = self.generate_final_type_cast();
-                    format!("{final_cast}{lane_fn}(__return_value, {idx})")
-                })
-                .collect::<Vec<_>>()
-                .join(r#" << ", " << "#)
-        } else {
-            format!(
-                "{promote}cast<{cast}>(__return_value)",
-                cast = match self.kind() {
-                    TypeKind::Float if self.inner_size() == 16 => "float16_t".to_string(),
-                    TypeKind::Float if self.inner_size() == 32 => "float".to_string(),
-                    TypeKind::Float if self.inner_size() == 64 => "double".to_string(),
-                    TypeKind::Int(Sign::Signed) => format!("int{}_t", self.inner_size()),
-                    TypeKind::Int(Sign::Unsigned) => format!("uint{}_t", self.inner_size()),
-                    TypeKind::Poly => format!("poly{}_t", self.inner_size()),
-                    ty => todo!("print_result_c - Unknown type: {ty:#?}"),
-                },
-                promote = self.generate_final_type_cast(),
-            )
-        };
-
-        format!(
-            r#"{indentation}std::cout << "Result {additional}-" << i+1 << ": {ty}" << std::fixed << std::setprecision(150) <<  {lanes} << "{close}" << std::endl;"#,
-            ty = if self.is_simd() {
-                format!("{}(", self.c_type())
-            } else {
-                String::from("")
-            },
-            close = if self.is_simd() { ")" } else { "" },
-        )
-    }
 }
 
 impl ArmIntrinsicType {
diff --git a/library/stdarch/crates/intrinsic-test/src/common/argument.rs b/library/stdarch/crates/intrinsic-test/src/common/argument.rs
index 8ae9869db0621..58fd08460f14c 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/argument.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/argument.rs
@@ -1,4 +1,5 @@
-use super::cli::Language;
+use itertools::Itertools;
+
 use super::constraint::Constraint;
 use super::gen_rust::PASSES;
 use super::indentation::Indentation;
@@ -73,10 +74,6 @@ where
             format!("{}_vals", self.name.to_lowercase())
         }
     }
-
-    fn as_call_param_c(&self) -> String {
-        self.ty.as_call_param_c(&self.generate_name())
-    }
 }
 
 #[derive(Debug, PartialEq, Clone)]
@@ -88,13 +85,26 @@ impl<T> ArgumentList<T>
 where
     T: IntrinsicTypeDefinition,
 {
-    /// Converts the argument list into the call parameters for a C function call.
-    /// e.g. this would generate something like `a, &b, c`
-    pub fn as_call_param_c(&self) -> String {
+    pub fn as_non_imm_arglist_c(&self) -> String {
         self.iter()
-            .map(|arg| arg.as_call_param_c())
-            .collect::<Vec<String>>()
-            .join(", ")
+            .filter(|arg| !arg.has_constraint())
+            .format_with(", ", |arg, fmt| {
+                fmt(&format_args!("{} {}", arg.to_c_type(), arg.name))
+            })
+            .to_string()
+    }
+
+    pub fn as_call_params_c(&self, imm_args: &[i64]) -> String {
+        let mut imm_args = imm_args.iter();
+        self.iter()
+            .format_with(", ", |arg, fmt| {
+                if arg.has_constraint() {
+                    fmt(&imm_args.next().unwrap())
+                } else {
+                    fmt(&arg.name)
+                }
+            })
+            .to_string()
     }
 
     /// Converts the argument list into the call parameters for a Rust function.
@@ -107,30 +117,6 @@ where
             .join(", ")
     }
 
-    /// Creates a line for each argument that initializes an array for C from which `loads` argument
-    /// values can be loaded  as a sliding window.
-    /// e.g `const int32x2_t a_vals = {0x3effffff, 0x3effffff, 0x3f7fffff}`, if loads=2.
-    pub fn gen_arglists_c(
-        &self,
-        w: &mut impl std::io::Write,
-        indentation: Indentation,
-        loads: u32,
-    ) -> std::io::Result<()> {
-        for arg in self.iter().filter(|&arg| !arg.has_constraint()) {
-            // Setting the variables on an aligned boundary to make it easier to pick
-            // functions (of a specific architecture) that would help load the values.
-            writeln!(
-                w,
-                "{indentation}alignas(64) const {ty} {name}_vals[] = {values};",
-                ty = arg.ty.c_scalar_type(),
-                name = arg.generate_name(),
-                values = arg.ty.populate_random(indentation, loads, &Language::C)
-            )?
-        }
-
-        Ok(())
-    }
-
     /// Creates a line for each argument that initializes an array for Rust from which `loads` argument
     /// values can be loaded as a sliding window, e.g `const A_VALS: [u32; 20]  = [...];`
     pub fn gen_arglists_rust(
@@ -164,34 +150,10 @@ where
             name = arg.rust_vals_array_name(),
             ty = arg.ty.rust_scalar_type(),
             load_size = arg.ty.num_lanes() * arg.ty.num_vectors() + loads - 1,
-            values = arg.ty.populate_random(indentation, loads, &Language::Rust)
+            values = arg.ty.populate_random(indentation, loads)
         )
     }
 
-    /// Creates a line for each argument that initializes the argument from an array `[arg]_vals` at
-    /// an offset `i` using a load intrinsic, in C.
-    /// e.g `uint8x8_t a = vld1_u8(&a_vals[i]);`
-    ///
-    /// ARM-specific
-    pub fn load_values_c(&self, indentation: Indentation) -> String {
-        self.iter()
-            .filter(|&arg| !arg.has_constraint())
-            .enumerate()
-            .map(|(idx, arg)| {
-                format!(
-                    "{indentation}{ty} {name} = cast<{ty}>({load}(&{name}_vals[(i+{idx}) % {PASSES}]));\n",
-                    ty = arg.to_c_type(),
-                    name = arg.generate_name(),
-                    load = if arg.is_simd() {
-                        arg.ty.get_load_function(Language::C)
-                    } else {
-                        "*".to_string()
-                    }
-                )
-            })
-            .collect()
-    }
-
     /// Creates a line for each argument that initializes the argument from array `[ARG]_VALS` at
     /// an offset `i` using a load intrinsic, in Rust.
     /// e.g `let a = vld1_u8(A_VALS.as_ptr().offset(i));`
@@ -205,7 +167,7 @@ where
                         "{indentation}let {name} = {load}({vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _);\n",
                         name = arg.generate_name(),
                         vals_name = arg.rust_vals_array_name(),
-                        load = arg.ty.get_load_function(Language::Rust),
+                        load = arg.ty.get_load_function(),
                     )
                 } else {
                     format!(
diff --git a/library/stdarch/crates/intrinsic-test/src/common/cli.rs b/library/stdarch/crates/intrinsic-test/src/common/cli.rs
index bed8259de8b6d..f407b5ceb7d48 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/cli.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/cli.rs
@@ -1,12 +1,6 @@
 use itertools::Itertools;
 use std::path::PathBuf;
 
-#[derive(Debug, PartialEq)]
-pub enum Language {
-    Rust,
-    C,
-}
-
 /// Intrinsic test tool
 #[derive(clap::Parser)]
 #[command(
@@ -17,41 +11,13 @@ pub struct Cli {
     /// The input file containing the intrinsics
     pub input: PathBuf,
 
-    /// The rust toolchain to use for building the rust code
-    #[arg(long)]
-    pub toolchain: Option<String>,
-
-    /// The C++ compiler to use for compiling the c++ code
-    #[arg(long, default_value_t = String::from("clang++"))]
-    pub cppcompiler: String,
-
-    /// Run the C programs under emulation with this command
-    #[arg(long)]
-    pub runner: Option<String>,
-
     /// Filename for a list of intrinsics to skip (one per line)
     #[arg(long)]
     pub skip: Option<PathBuf>,
 
-    /// Regenerate test programs, but don't build or run them
-    #[arg(long)]
-    pub generate_only: bool,
-
     /// Pass a target the test suite
-    #[arg(long, default_value_t = String::from("armv7-unknown-linux-gnueabihf"))]
-    pub target: String,
-
-    /// Pass a profile (release, dev)
-    #[arg(long, default_value_t = String::from("release"))]
-    pub profile: String,
-
-    /// Set the linker
-    #[arg(long)]
-    pub linker: Option<String>,
-
-    /// Set the sysroot for the C++ compiler
     #[arg(long)]
-    pub cxx_toolchain_dir: Option<String>,
+    pub target: String,
 
     #[arg(long, default_value_t = 100u8)]
     pub sample_percentage: u8,
@@ -59,13 +25,7 @@ pub struct Cli {
 
 pub struct ProcessedCli {
     pub filename: PathBuf,
-    pub toolchain: Option<String>,
-    pub cpp_compiler: Option<String>,
-    pub runner: String,
     pub target: String,
-    pub profile: String,
-    pub linker: Option<String>,
-    pub cxx_toolchain_dir: Option<String>,
     pub skip: Vec<String>,
     pub sample_percentage: u8,
 }
@@ -73,11 +33,7 @@ pub struct ProcessedCli {
 impl ProcessedCli {
     pub fn new(cli_options: Cli) -> Self {
         let filename = cli_options.input;
-        let runner = cli_options.runner.unwrap_or_default();
         let target = cli_options.target;
-        let profile = cli_options.profile;
-        let linker = cli_options.linker;
-        let cxx_toolchain_dir = cli_options.cxx_toolchain_dir;
         let sample_percentage = cli_options.sample_percentage;
 
         let skip = if let Some(filename) = cli_options.skip {
@@ -91,27 +47,8 @@ impl ProcessedCli {
             Default::default()
         };
 
-        let (toolchain, cpp_compiler) = if cli_options.generate_only {
-            (None, None)
-        } else {
-            (
-                Some(
-                    cli_options
-                        .toolchain
-                        .map_or_else(String::new, |t| format!("+{t}")),
-                ),
-                Some(cli_options.cppcompiler),
-            )
-        };
-
         Self {
-            toolchain,
-            cpp_compiler,
-            runner,
             target,
-            profile,
-            linker,
-            cxx_toolchain_dir,
             skip,
             filename,
             sample_percentage,
diff --git a/library/stdarch/crates/intrinsic-test/src/common/compare.rs b/library/stdarch/crates/intrinsic-test/src/common/compare.rs
deleted file mode 100644
index c1438d1bbf8ce..0000000000000
--- a/library/stdarch/crates/intrinsic-test/src/common/compare.rs
+++ /dev/null
@@ -1,144 +0,0 @@
-use itertools::Itertools;
-use rayon::prelude::*;
-use std::{collections::HashMap, process::Command};
-
-pub const INTRINSIC_DELIMITER: &str = "############";
-fn runner_command(runner: &str) -> Command {
-    let mut it = runner.split_whitespace();
-    let mut cmd = Command::new(it.next().unwrap());
-    cmd.args(it);
-
-    cmd
-}
-
-pub fn compare_outputs(
-    intrinsic_name_list: &Vec<String>,
-    runner: &str,
-    target: &str,
-    profile: &str,
-) -> bool {
-    let profile_dir = match profile {
-        "dev" => "debug",
-        _ => "release",
-    };
-
-    let (c, rust) = rayon::join(
-        || {
-            runner_command(runner)
-                .arg("./intrinsic-test-programs")
-                .current_dir("c_programs")
-                .output()
-        },
-        || {
-            runner_command(runner)
-                .arg(format!(
-                    "./target/{target}/{profile_dir}/intrinsic-test-programs"
-                ))
-                .current_dir("rust_programs")
-                .output()
-        },
-    );
-    let (c, rust) = match (c, rust) {
-        (Ok(c), Ok(rust)) => (c, rust),
-        failure => panic!("Failed to run: {failure:#?}"),
-    };
-
-    if !c.status.success() {
-        error!(
-            "Failed to run C program.\nstdout: {stdout}\nstderr: {stderr}",
-            stdout = std::str::from_utf8(&c.stdout).unwrap_or(""),
-            stderr = std::str::from_utf8(&c.stderr).unwrap_or(""),
-        );
-    }
-
-    if !rust.status.success() {
-        error!(
-            "Failed to run Rust program.\nstdout: {stdout}\nstderr: {stderr}",
-            stdout = std::str::from_utf8(&rust.stdout).unwrap_or(""),
-            stderr = std::str::from_utf8(&rust.stderr).unwrap_or(""),
-        );
-    }
-
-    info!("Completed running C++ and Rust test binaries");
-    let c = std::str::from_utf8(&c.stdout)
-        .unwrap()
-        .to_lowercase()
-        .replace("-nan", "nan");
-    let rust = std::str::from_utf8(&rust.stdout)
-        .unwrap()
-        .to_lowercase()
-        .replace("-nan", "nan");
-
-    let c_output_map = c
-        .split(INTRINSIC_DELIMITER)
-        .filter_map(|output| output.trim().split_once("\n"))
-        .collect::<HashMap<&str, &str>>();
-    let rust_output_map = rust
-        .split(INTRINSIC_DELIMITER)
-        .filter_map(|output| output.trim().split_once("\n"))
-        .collect::<HashMap<&str, &str>>();
-
-    assert!(!c_output_map.is_empty(), "No C intrinsic output found!");
-
-    let intrinsics = c_output_map
-        .keys()
-        .chain(rust_output_map.keys())
-        .unique()
-        .collect_vec();
-
-    info!("Comparing outputs");
-    let intrinsics_diff_count = intrinsics
-        .par_iter()
-        .filter_map(|&&intrinsic| {
-            let c_output = c_output_map.get(intrinsic).unwrap();
-            let rust_output = rust_output_map.get(intrinsic).unwrap();
-            if rust_output.eq(c_output) {
-                None
-            } else {
-                let diff = diff::lines(c_output, rust_output);
-                let diffs = diff
-                    .into_iter()
-                    .filter_map(|diff| match diff {
-                        diff::Result::Left(_) | diff::Result::Right(_) => Some(diff),
-                        diff::Result::Both(_, _) => None,
-                    })
-                    .collect_vec();
-                if diffs.len() > 0 {
-                    Some((intrinsic, diffs))
-                } else {
-                    None
-                }
-            }
-        })
-        .inspect(|(intrinsic, diffs)| {
-            use std::io::Write;
-
-            let stdout = std::io::stdout();
-            let mut out = stdout.lock();
-
-            writeln!(out, "Difference for intrinsic: {intrinsic}").unwrap();
-            diffs.into_iter().for_each(|diff| match diff {
-                diff::Result::Left(c) => {
-                    writeln!(out, "C: {c}").unwrap();
-                }
-                diff::Result::Right(rust) => {
-                    writeln!(out, "Rust: {rust}").unwrap();
-                }
-                _ => (),
-            });
-            writeln!(
-                out,
-                "****************************************************************"
-            )
-            .unwrap();
-        })
-        .count();
-
-    println!(
-        "{} differences found (tested {} intrinsics)",
-        intrinsics_diff_count,
-        intrinsic_name_list.len()
-    );
-
-    intrinsics_diff_count == 0
-}
diff --git a/library/stdarch/crates/intrinsic-test/src/common/compile_c.rs b/library/stdarch/crates/intrinsic-test/src/common/compile_c.rs
deleted file mode 100644
index fa78b332a7857..0000000000000
--- a/library/stdarch/crates/intrinsic-test/src/common/compile_c.rs
+++ /dev/null
@@ -1,136 +0,0 @@
-#[derive(Clone)]
-pub struct CompilationCommandBuilder {
-    compiler: String,
-    target: Option<String>,
-    cxx_toolchain_dir: Option<String>,
-    arch_flags: Vec<String>,
-    optimization: String,
-    project_root: Option<String>,
-    extra_flags: Vec<String>,
-}
-
-impl CompilationCommandBuilder {
-    pub fn new() -> Self {
-        Self {
-            compiler: String::new(),
-            target: None,
-            cxx_toolchain_dir: None,
-            arch_flags: Vec::new(),
-            optimization: "2".to_string(),
-            project_root: None,
-            extra_flags: Vec::new(),
-        }
-    }
-
-    pub fn set_compiler(mut self, compiler: &str) -> Self {
-        self.compiler = compiler.to_string();
-        self
-    }
-
-    pub fn set_target(mut self, target: &str) -> Self {
-        self.target = Some(target.to_string());
-        self
-    }
-
-    pub fn set_cxx_toolchain_dir(mut self, path: Option<&str>) -> Self {
-        self.cxx_toolchain_dir = path.map(|p| p.to_string());
-        self
-    }
-
-    pub fn add_arch_flags<'a>(mut self, flags: impl IntoIterator<Item = &'a str>) -> Self {
-        self.arch_flags
-            .extend(flags.into_iter().map(|s| s.to_owned()));
-
-        self
-    }
-
-    pub fn set_opt_level(mut self, optimization: &str) -> Self {
-        self.optimization = optimization.to_string();
-        self
-    }
-
-    /// Sets the root path of all the generated test files.
-    pub fn set_project_root(mut self, path: &str) -> Self {
-        self.project_root = Some(path.to_string());
-        self
-    }
-
-    pub fn add_extra_flags<'a>(mut self, flags: impl IntoIterator<Item = &'a str>) -> Self {
-        self.extra_flags
-            .extend(flags.into_iter().map(|s| s.to_owned()));
-
-        self
-    }
-
-    pub fn add_extra_flag(self, flag: &str) -> Self {
-        self.add_extra_flags([flag])
-    }
-}
-
-impl CompilationCommandBuilder {
-    pub fn into_cpp_compilation(self) -> CppCompilation {
-        let mut cpp_compiler = std::process::Command::new(self.compiler);
-
-        if let Some(project_root) = self.project_root {
-            cpp_compiler.current_dir(project_root);
-        }
-
-        let flags = std::env::var("CPPFLAGS").unwrap_or("".into());
-        cpp_compiler.args(flags.split_whitespace());
-
-        cpp_compiler.arg(format!("-march={}", self.arch_flags.join("+")));
-
-        cpp_compiler.arg(format!("-O{}", self.optimization));
-
-        cpp_compiler.args(self.extra_flags);
-
-        if let Some(target) = &self.target {
-            cpp_compiler.arg(format!("--target={target}"));
-        }
-
-        CppCompilation(cpp_compiler)
-    }
-}
-
-pub struct CppCompilation(std::process::Command);
-
-fn clone_command(command: &std::process::Command) -> std::process::Command {
-    let mut cmd = std::process::Command::new(command.get_program());
-    if let Some(current_dir) = command.get_current_dir() {
-        cmd.current_dir(current_dir);
-    }
-    cmd.args(command.get_args());
-
-    for (key, val) in command.get_envs() {
-        cmd.env(key, val.unwrap_or_default());
-    }
-
-    cmd
-}
-
-impl CppCompilation {
-    pub fn command_mut(&mut self) -> &mut std::process::Command {
-        &mut self.0
-    }
-
-    pub fn compile_object_file(
-        &self,
-        input: &str,
-        output: &str,
-    ) -> std::io::Result<std::process::Output> {
-        let mut cmd = clone_command(&self.0);
-        cmd.args([input, "-v", "-c", "-o", output]);
-        cmd.output()
-    }
-
-    pub fn link_executable(
-        &self,
-        inputs: impl Iterator<Item = String>,
-        output: &str,
-    ) -> std::io::Result<std::process::Output> {
-        let mut cmd = clone_command(&self.0);
-        cmd.args(inputs);
-        cmd.args(["-o", output]);
-        cmd.output()
-    }
-}
diff --git a/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs b/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs
index a95b4c36b7bf0..0b0df5830c482 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs
@@ -1,166 +1,71 @@
+use itertools::Itertools;
+
+use crate::common::constraint::Constraint;
 use crate::common::intrinsic::Intrinsic;
 
-use super::argument::Argument;
-use super::compare::INTRINSIC_DELIMITER;
-use super::indentation::Indentation;
 use super::intrinsic_helpers::IntrinsicTypeDefinition;
 
-// The number of times each intrinsic will be called.
-const PASSES: u32 = 20;
-const COMMON_HEADERS: [&str; 7] = [
-    "iostream",
-    "string",
-    "cstring",
-    "iomanip",
-    "sstream",
-    "type_traits",
-    "cassert",
-];
-
-pub fn generate_c_test_loop<T: IntrinsicTypeDefinition + Sized>(
-    w: &mut impl std::io::Write,
-    intrinsic: &Intrinsic<T>,
-    indentation: Indentation,
-    additional: &str,
-    passes: u32,
-) -> std::io::Result<()> {
-    let body_indentation = indentation.nested();
-    writeln!(
-        w,
-        "{indentation}for (int i=0; i<{passes}; i++) {{\n\
-            {loaded_args}\
-            {body_indentation}auto __return_value = {intrinsic_call}({args});\n\
-            {print_result}\n\
-        {indentation}}}",
-        loaded_args = intrinsic.arguments.load_values_c(body_indentation),
-        intrinsic_call = intrinsic.name,
-        args = intrinsic.arguments.as_call_param_c(),
-        print_result = intrinsic
-            .results
-            .print_result_c(body_indentation, additional)
-    )
-}
-
-pub fn generate_c_constraint_blocks<'a, T: IntrinsicTypeDefinition + 'a>(
+fn generate_c_wrapper<'a, T: IntrinsicTypeDefinition + 'a>(
     w: &mut impl std::io::Write,
     intrinsic: &Intrinsic<T>,
-    indentation: Indentation,
-    constraints: &mut (impl Iterator<Item = &'a Argument<T>> + Clone),
-    name: String,
+    constraints: &mut (impl Iterator<Item = &'a Constraint> + Clone),
+    imm_values: &mut Vec<i64>,
 ) -> std::io::Result<()> {
-    let Some(current) = constraints.next() else {
-        return generate_c_test_loop(w, intrinsic, indentation, &name, PASSES);
-    };
-
-    let body_indentation = indentation.nested();
-    for i in current.constraint.iter().flat_map(|c| c.iter()) {
-        let ty = current.ty.c_type();
-
-        writeln!(w, "{indentation}{{")?;
-
-        // TODO: Move to actually specifying the enum value
-        // instead of typecasting integers, for better clarity
-        // of generated code.
+    if let Some(current) = constraints.next() {
+        for i in current.iter() {
+            imm_values.push(i);
+            generate_c_wrapper(w, intrinsic, &mut constraints.clone(), imm_values)?;
+            imm_values.pop();
+        }
+    } else {
         writeln!(
             w,
-            "{body_indentation}const {ty} {} = ({ty}){i};",
-            current.generate_name()
+            "
+{return_ty} {name}_wrapper{imm_arglist}({arglist}) {{
+    return {name}({params});
+}}",
+            return_ty = intrinsic.results.c_type(),
+            name = intrinsic.name,
+            imm_arglist = imm_values
+                .iter()
+                .format_with("", |i, fmt| fmt(&format_args!("_{i}"))),
+            arglist = intrinsic.arguments.as_non_imm_arglist_c(),
+            params = intrinsic.arguments.as_call_params_c(&imm_values)
         )?;
-
-        generate_c_constraint_blocks(
-            w,
-            intrinsic,
-            body_indentation,
-            &mut constraints.clone(),
-            format!("{name}-{i}"),
-        )?;
-
-        writeln!(w, "{indentation}}}")?;
     }
-
     Ok(())
 }
 
-// Compiles C test programs using specified compiler
-pub fn create_c_test_function<T: IntrinsicTypeDefinition>(
+fn create_c_wrapper<T: IntrinsicTypeDefinition>(
     w: &mut impl std::io::Write,
     intrinsic: &Intrinsic<T>,
 ) -> std::io::Result<()> {
-    let indentation = Indentation::default();
-
-    writeln!(w, "int run_{}() {{", intrinsic.name)?;
-
-    // Define the arrays of arguments.
-    let arguments = &intrinsic.arguments;
-    arguments.gen_arglists_c(w, indentation.nested(), PASSES)?;
-
-    generate_c_constraint_blocks(
+    generate_c_wrapper(
         w,
         intrinsic,
-        indentation.nested(),
-        &mut arguments.iter().rev().filter(|&i| i.has_constraint()),
-        Default::default(),
-    )?;
-
-    writeln!(w, "    return 0;")?;
-    writeln!(w, "}}")?;
-
-    Ok(())
+        &mut intrinsic
+            .arguments
+            .iter()
+            .filter_map(|arg| arg.constraint.as_ref()),
+        &mut Vec::new(),
+    )
 }
 
-pub fn write_mod_cpp<T: IntrinsicTypeDefinition>(
+pub fn write_wrapper_c<T: IntrinsicTypeDefinition>(
     w: &mut impl std::io::Write,
     notice: &str,
     platform_headers: &[&str],
-    forward_declarations: &str,
     intrinsics: &[Intrinsic<T>],
 ) -> std::io::Result<()> {
     write!(w, "{notice}")?;
 
-    for header in COMMON_HEADERS.iter().chain(platform_headers.iter()) {
-        writeln!(w, "#include <{header}>")?;
-    }
-
-    writeln!(w, "{}", forward_declarations)?;
-
-    for intrinsic in intrinsics {
-        create_c_test_function(w, intrinsic)?;
-    }
-
-    Ok(())
-}
-
-pub fn write_main_cpp<'a>(
-    w: &mut impl std::io::Write,
-    arch_specific_definitions: &str,
-    arch_specific_headers: &[&str],
-    intrinsics: impl Iterator<Item = &'a str> + Clone,
-) -> std::io::Result<()> {
-    for header in COMMON_HEADERS.iter().chain(arch_specific_headers.iter()) {
+    for header in platform_headers {
         writeln!(w, "#include <{header}>")?;
     }
 
-    // NOTE: It's assumed that this value contains the required `ifdef`s.
-    writeln!(w, "{arch_specific_definitions }")?;
-
-    for intrinsic in intrinsics.clone() {
-        writeln!(w, "extern int run_{intrinsic}(void);")?;
-    }
-
-    writeln!(w, "int main(int argc, char **argv) {{")?;
-
     for intrinsic in intrinsics {
-        writeln!(
-            w,
-            "    std::cout << \"{INTRINSIC_DELIMITER}\" << std::endl;"
-        )?;
-        writeln!(w, "    std::cout << \"{intrinsic}\" << std::endl;")?;
-        writeln!(w, "    run_{intrinsic}();\n")?;
+        create_c_wrapper(w, intrinsic)?;
     }
 
-    writeln!(w, "    return 0;")?;
-
-    writeln!(w, "}}")?;
-
     Ok(())
 }
diff --git a/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs b/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs
index 82b97701bb14a..daf239cbd8d34 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs
@@ -1,7 +1,5 @@
 use itertools::Itertools;
-use std::process::Command;
 
-use super::compare::INTRINSIC_DELIMITER;
 use super::indentation::Indentation;
 use super::intrinsic_helpers::IntrinsicTypeDefinition;
 use crate::common::argument::ArgumentList;
@@ -61,42 +59,6 @@ pub fn write_lib_cargo_toml(w: &mut impl std::io::Write, name: &str) -> std::io:
     Ok(())
 }
 
-pub fn write_main_rs<'a>(
-    w: &mut impl std::io::Write,
-    chunk_count: usize,
-    cfg: &str,
-    definitions: &str,
-    intrinsics: impl Iterator<Item = &'a str> + Clone,
-) -> std::io::Result<()> {
-    writeln!(w, "#![feature(simd_ffi)]")?;
-    writeln!(w, "#![feature(f16)]")?;
-    writeln!(w, "#![allow(unused)]")?;
-
-    // Cargo will spam the logs if these warnings are not silenced.
-    writeln!(w, "#![allow(non_upper_case_globals)]")?;
-    writeln!(w, "#![allow(non_camel_case_types)]")?;
-    writeln!(w, "#![allow(non_snake_case)]")?;
-
-    writeln!(w, "{cfg}")?;
-    writeln!(w, "{definitions}")?;
-
-    for module in 0..chunk_count {
-        writeln!(w, "use mod_{module}::*;")?;
-    }
-
-    writeln!(w, "fn main() {{")?;
-
-    for binary in intrinsics {
-        writeln!(w, "    println!(\"{INTRINSIC_DELIMITER}\");")?;
-        writeln!(w, "    println!(\"{binary}\");")?;
-        writeln!(w, "    run_{binary}();\n")?;
-    }
-
-    writeln!(w, "}}")?;
-
-    Ok(())
-}
-
 pub fn write_lib_rs<T: IntrinsicTypeDefinition>(
     w: &mut impl std::io::Write,
     notice: &str,
@@ -140,72 +102,6 @@ pub fn write_lib_rs<T: IntrinsicTypeDefinition>(
     Ok(())
 }
 
-pub fn compile_rust_programs(
-    toolchain: Option<&str>,
-    target: &str,
-    profile: &str,
-    linker: Option<&str>,
-) -> bool {
-    /* If there has been a linker explicitly set from the command line then
-     * we want to set it via setting it in the RUSTFLAGS*/
-
-    // This is done because `toolchain` is None when
-    // the --generate-only flag is passed
-    if toolchain.is_none() {
-        return true;
-    }
-
-    trace!("Building cargo command");
-
-    let mut cargo_command = Command::new("cargo");
-    cargo_command.current_dir("rust_programs");
-
-    // Do not use the target directory of the workspace please.
-    cargo_command.env("CARGO_TARGET_DIR", "target");
-
-    if toolchain.is_some_and(|val| !val.is_empty()) {
-        cargo_command.arg(toolchain.unwrap());
-    }
-    cargo_command.args(["build", "--target", target, "--profile", profile]);
-
-    let mut rust_flags = "-Cdebuginfo=0".to_string();
-    if let Some(linker) = linker {
-        rust_flags.push_str(" -C linker=");
-        rust_flags.push_str(linker);
-        rust_flags.push_str(" -C link-args=-static");
-
-        cargo_command.env("CPPFLAGS", "-fuse-ld=lld");
-    }
-
-    cargo_command.env("RUSTFLAGS", rust_flags);
-
-    trace!("running cargo");
-
-    if log::log_enabled!(log::Level::Trace) {
-        cargo_command.stdout(std::process::Stdio::inherit());
-        cargo_command.stderr(std::process::Stdio::inherit());
-    }
-
-    let output = cargo_command.output();
-    trace!("cargo is done");
-
-    if let Ok(output) = output {
-        if output.status.success() {
-            true
-        } else {
-            error!(
-                "Failed to compile code for rust intrinsics\n\nstdout:\n{}\n\nstderr:\n{}",
-                std::str::from_utf8(&output.stdout).unwrap_or(""),
-                std::str::from_utf8(&output.stderr).unwrap_or("")
-            );
-            false
-        }
-    } else {
-        error!("Command failed: {output:#?}");
-        false
-    }
-}
-
 pub fn generate_rust_test_loop<T: IntrinsicTypeDefinition>(
     w: &mut impl std::io::Write,
     intrinsic: &Intrinsic<T>,
diff --git a/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs
index a14d7ef05f539..f216b55c2f752 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs
@@ -5,7 +5,6 @@ use std::str::FromStr;
 
 use itertools::Itertools as _;
 
-use super::cli::Language;
 use super::indentation::Indentation;
 use super::values::value_for_array;
 
@@ -154,67 +153,7 @@ impl IntrinsicType {
         self.ptr
     }
 
-    pub fn c_scalar_type(&self) -> String {
-        match self.kind() {
-            TypeKind::Char(_) => String::from("char"),
-            TypeKind::Vector => String::from("int32_t"),
-            _ => format!(
-                "{prefix}{bits}_t",
-                prefix = self.kind().c_prefix(),
-                bits = self.inner_size()
-            ),
-        }
-    }
-
-    pub fn c_promotion(&self) -> &str {
-        match *self {
-            IntrinsicType {
-                kind,
-                bit_len: Some(8),
-                ..
-            } => match kind {
-                TypeKind::Int(Sign::Signed) => "int",
-                TypeKind::Int(Sign::Unsigned) => "unsigned int",
-                TypeKind::Poly => "uint8_t",
-                _ => "",
-            },
-            IntrinsicType {
-                kind: TypeKind::Poly,
-                bit_len: Some(bit_len),
-                ..
-            } => match bit_len {
-                8 => unreachable!("handled above"),
-                16 => "uint16_t",
-                32 => "uint32_t",
-                64 => "uint64_t",
-                128 => "",
-                _ => panic!("invalid bit_len"),
-            },
-            IntrinsicType {
-                kind: TypeKind::Float,
-                bit_len: Some(bit_len),
-                ..
-            } => match bit_len {
-                16 => "float16_t",
-                32 => "float",
-                64 => "double",
-                128 => "",
-                _ => panic!("invalid bit_len"),
-            },
-            IntrinsicType {
-                kind: TypeKind::Char(_),
-                ..
-            } => "char",
-            _ => "",
-        }
-    }
-
-    pub fn populate_random(
-        &self,
-        indentation: Indentation,
-        loads: u32,
-        language: &Language,
-    ) -> String {
+    pub fn populate_random(&self, indentation: Indentation, loads: u32) -> String {
         match self {
             IntrinsicType {
                 bit_len: Some(bit_len @ (1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 16 | 32 | 64)),
@@ -224,13 +163,9 @@ impl IntrinsicType {
                 vec_len,
                 ..
             } => {
-                let (prefix, suffix) = match language {
-                    Language::Rust => ('[', ']'),
-                    Language::C => ('{', '}'),
-                };
                 let body_indentation = indentation.nested();
                 format!(
-                    "{prefix}\n{body}\n{indentation}{suffix}",
+                    "[\n{body}\n{indentation}]",
                     body = (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1) + loads - 1))
                         .format_with(",\n", |i, fmt| {
                             let src = value_for_array(*bit_len, i);
@@ -241,13 +176,7 @@ impl IntrinsicType {
                                 let mask = !0u64 >> (64 - *bit_len);
                                 let ones_compl = src ^ mask;
                                 let twos_compl = ones_compl + 1;
-                                if (twos_compl == src) && (language == &Language::C) {
-                                    // `src` is INT*_MIN. C requires `-0x7fffffff - 1` to avoid
-                                    // undefined literal overflow behaviour.
-                                    fmt(&format_args!("{body_indentation}-{ones_compl:#x} - 1"))
-                                } else {
-                                    fmt(&format_args!("{body_indentation}-{twos_compl:#x}"))
-                                }
+                                fmt(&format_args!("{body_indentation}-{twos_compl:#x}"))
                             } else {
                                 fmt(&format_args!("{body_indentation}{src:#x}"))
                             }
@@ -261,20 +190,11 @@ impl IntrinsicType {
                 vec_len,
                 ..
             } => {
-                let (prefix, cast_prefix, cast_suffix, suffix) = match (language, bit_len) {
-                    (&Language::Rust, 16) => ('[', "f16::from_bits(", ")", ']'),
-                    (&Language::Rust, 32) => ('[', "f32::from_bits(", ")", ']'),
-                    (&Language::Rust, 64) => ('[', "f64::from_bits(", ")", ']'),
-                    (&Language::C, 16) => ('{', "cast<float16_t, uint16_t>(", ")", '}'),
-                    (&Language::C, 32) => ('{', "cast<float, uint32_t>(", ")", '}'),
-                    (&Language::C, 64) => ('{', "cast<double, uint64_t>(", ")", '}'),
-                    _ => unreachable!(),
-                };
                 format!(
-                    "{prefix}\n{body}\n{indentation}{suffix}",
+                    "[\n{body}\n{indentation}]",
                     body = (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1) + loads - 1))
                         .format_with(",\n", |i, fmt| fmt(&format_args!(
-                            "{indentation}{cast_prefix}{src:#x}{cast_suffix}",
+                            "{indentation}f{bit_len}::from_bits({src:#x})",
                             indentation = indentation.nested(),
                             src = value_for_array(*bit_len, i)
                         )))
@@ -287,14 +207,10 @@ impl IntrinsicType {
                 vec_len,
                 ..
             } => {
-                let (prefix, suffix) = match language {
-                    Language::Rust => ('[', ']'),
-                    Language::C => ('{', '}'),
-                };
                 let body_indentation = indentation.nested();
                 let effective_bit_len = 32;
                 format!(
-                    "{prefix}\n{body}\n{indentation}{suffix}",
+                    "[\n{body}\n{indentation}]",
                     body = (0..(vec_len.unwrap_or(1) * simd_len.unwrap_or(1) + loads - 1))
                         .format_with(",\n", |i, fmt| {
                             let src = value_for_array(effective_bit_len, i);
@@ -304,13 +220,7 @@ impl IntrinsicType {
                                 let mask = !0u64 >> (64 - effective_bit_len);
                                 let ones_compl = src ^ mask;
                                 let twos_compl = ones_compl + 1;
-                                if (twos_compl == src) && (language == &Language::C) {
-                                    // `src` is INT*_MIN. C requires `-0x7fffffff - 1` to avoid
-                                    // undefined literal overflow behaviour.
-                                    fmt(&format_args!("{body_indentation}-{ones_compl:#x} - 1"))
-                                } else {
-                                    fmt(&format_args!("{body_indentation}-{twos_compl:#x}"))
-                                }
+                                fmt(&format_args!("{body_indentation}-{twos_compl:#x}"))
                             } else {
                                 fmt(&format_args!("{body_indentation}{src:#x}"))
                             }
@@ -335,36 +245,17 @@ impl IntrinsicType {
             _ => true,
         }
     }
-
-    pub fn as_call_param_c(&self, name: &String) -> String {
-        if self.ptr {
-            format!("&{name}")
-        } else {
-            name.clone()
-        }
-    }
 }
 
 pub trait IntrinsicTypeDefinition: Deref<Target = IntrinsicType> {
     /// Determines the load function for this type.
     /// can be implemented in an `impl` block
-    fn get_load_function(&self, _language: Language) -> String;
-
-    /// can be implemented in an `impl` block
-    fn get_lane_function(&self) -> String;
+    fn get_load_function(&self) -> String;
 
     /// Gets a string containing the typename for this type in C format.
     /// can be directly defined in `impl` blocks
     fn c_type(&self) -> String;
 
-    /// can be directly defined in `impl` blocks
-    fn c_single_vector_type(&self) -> String;
-
-    /// Generates a std::cout for the intrinsics results that will match the
-    /// rust debug output format for the return type. The generated line assumes
-    /// there is an int i in scope which is the current pass number.
-    fn print_result_c(&self, indentation: Indentation, additional: &str) -> String;
-
     /// Generates a std::cout for the intrinsics results that will match the
     /// rust debug output format for the return type. The generated line assumes
     /// there is an int i in scope which is the current pass number.
@@ -380,13 +271,4 @@ pub trait IntrinsicTypeDefinition: Deref<Target = IntrinsicType> {
             bits = self.inner_size()
         )
     }
-
-    fn generate_final_type_cast(&self) -> String {
-        let type_data = self.c_promotion();
-        if type_data.len() > 2 {
-            format!("({type_data})")
-        } else {
-            String::new()
-        }
-    }
 }
diff --git a/library/stdarch/crates/intrinsic-test/src/common/mod.rs b/library/stdarch/crates/intrinsic-test/src/common/mod.rs
index a1062b3a87dbc..d98592d3b41d0 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/mod.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/mod.rs
@@ -1,24 +1,18 @@
-use std::fs::File;
+use std::{fs::File, io};
 
 use rayon::prelude::*;
 
 use cli::ProcessedCli;
 
 use crate::common::{
-    compile_c::CppCompilation,
-    gen_c::{write_main_cpp, write_mod_cpp},
-    gen_rust::{
-        compile_rust_programs, write_bin_cargo_toml, write_lib_cargo_toml, write_lib_rs,
-        write_main_rs,
-    },
+    gen_c::write_wrapper_c,
+    gen_rust::{write_bin_cargo_toml, write_lib_cargo_toml, write_lib_rs},
     intrinsic::Intrinsic,
     intrinsic_helpers::IntrinsicTypeDefinition,
 };
 
 pub mod argument;
 pub mod cli;
-pub mod compare;
-pub mod compile_c;
 pub mod constraint;
 pub mod gen_c;
 pub mod gen_rust;
@@ -40,94 +34,27 @@ pub trait SupportedArchitectureTest {
     const NOTICE: &str;
 
     const PLATFORM_C_HEADERS: &[&str];
-    const PLATFORM_C_DEFINITIONS: &str;
-    const PLATFORM_C_FORWARD_DECLARATIONS: &str;
 
     const PLATFORM_RUST_CFGS: &str;
     const PLATFORM_RUST_DEFINITIONS: &str;
 
-    fn cpp_compilation(&self) -> Option<CppCompilation>;
-
-    fn build_c_file(&self) -> bool {
+    fn generate_c_file(&self) {
         let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len(), 400);
 
-        let cpp_compiler_wrapped = self.cpp_compilation();
-
         std::fs::create_dir_all("c_programs").unwrap();
         self.intrinsics()
             .par_chunks(chunk_size)
             .enumerate()
             .map(|(i, chunk)| {
-                let c_filename = format!("c_programs/mod_{i}.cpp");
+                let c_filename = format!("c_programs/wrapper_{i}.cpp");
                 let mut file = File::create(&c_filename).unwrap();
-                let mod_file_write_result = write_mod_cpp(
-                    &mut file,
-                    Self::NOTICE,
-                    Self::PLATFORM_C_HEADERS,
-                    Self::PLATFORM_C_FORWARD_DECLARATIONS,
-                    chunk,
-                );
-
-                if let Err(error) = mod_file_write_result {
-                    return Err(format!("Error writing to mod_{i}.cpp: {error:?}"));
-                }
-
-                // compile this cpp file into a .o file.
-                //
-                // This is done because `cpp_compiler_wrapped` is None when
-                // the --generate-only flag is passed
-                trace!("compiling mod_{i}.cpp");
-                if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() {
-                    let compile_output = cpp_compiler
-                        .compile_object_file(&format!("mod_{i}.cpp"), &format!("mod_{i}.o"))
-                        .map_err(|e| format!("Error compiling mod_{i}.cpp: {e:?}"))?;
-
-                    assert!(
-                        compile_output.status.success(),
-                        "{}",
-                        String::from_utf8_lossy(&compile_output.stderr)
-                    );
-
-                    trace!("finished compiling mod_{i}.cpp");
-                }
-                Ok(())
+                write_wrapper_c(&mut file, Self::NOTICE, Self::PLATFORM_C_HEADERS, chunk)
             })
-            .collect::<Result<(), String>>()
+            .collect::<io::Result<()>>()
             .unwrap();
-
-        let mut file = File::create("c_programs/main.cpp").unwrap();
-        write_main_cpp(
-            &mut file,
-            Self::PLATFORM_C_DEFINITIONS,
-            Self::PLATFORM_C_HEADERS,
-            self.intrinsics().iter().map(|i| i.name.as_str()),
-        )
-        .unwrap();
-
-        // This is done because `cpp_compiler_wrapped` is None when
-        // the --generate-only flag is passed
-        if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() {
-            // compile this cpp file into a .o file
-            trace!("compiling main.cpp");
-            let output = cpp_compiler
-                .compile_object_file("main.cpp", "intrinsic-test-programs.o")
-                .unwrap();
-            assert!(output.status.success(), "{output:?}");
-
-            let object_files = (0..chunk_count)
-                .map(|i| format!("mod_{i}.o"))
-                .chain(["intrinsic-test-programs.o".to_owned()]);
-
-            let output = cpp_compiler
-                .link_executable(object_files, "intrinsic-test-programs")
-                .unwrap();
-            assert!(output.status.success(), "{output:?}");
-        }
-
-        true
     }
 
-    fn build_rust_file(&self) -> bool {
+    fn generate_rust_file(&self) {
         std::fs::create_dir_all("rust_programs/src").unwrap();
 
         let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len(), 400);
@@ -135,21 +62,6 @@ pub trait SupportedArchitectureTest {
         let mut cargo = File::create("rust_programs/Cargo.toml").unwrap();
         write_bin_cargo_toml(&mut cargo, chunk_count).unwrap();
 
-        let mut main_rs = File::create("rust_programs/src/main.rs").unwrap();
-        write_main_rs(
-            &mut main_rs,
-            chunk_count,
-            Self::PLATFORM_RUST_CFGS,
-            "",
-            self.intrinsics().iter().map(|i| i.name.as_str()),
-        )
-        .unwrap();
-
-        let target = &self.cli_options().target;
-        let profile = &self.cli_options().profile;
-        let toolchain = self.cli_options().toolchain.as_deref();
-        let linker = self.cli_options().linker.as_deref();
-
         self.intrinsics()
             .par_chunks(chunk_size)
             .enumerate()
@@ -178,27 +90,6 @@ pub trait SupportedArchitectureTest {
             })
             .collect::<Result<(), std::io::Error>>()
             .unwrap();
-
-        compile_rust_programs(toolchain, target, profile, linker)
-    }
-
-    fn compare_outputs(&self) -> bool {
-        if self.cli_options().toolchain.is_some() {
-            let intrinsics_name_list = self
-                .intrinsics()
-                .iter()
-                .map(|i| i.name.clone())
-                .collect::<Vec<_>>();
-
-            compare::compare_outputs(
-                &intrinsics_name_list,
-                &self.cli_options().runner,
-                &self.cli_options().target,
-                &self.cli_options().profile,
-            )
-        } else {
-            true
-        }
     }
 }
 
diff --git a/library/stdarch/crates/intrinsic-test/src/main.rs b/library/stdarch/crates/intrinsic-test/src/main.rs
index e5c846877ce6d..9f57c99f12cf5 100644
--- a/library/stdarch/crates/intrinsic-test/src/main.rs
+++ b/library/stdarch/crates/intrinsic-test/src/main.rs
@@ -15,27 +15,21 @@ fn main() {
     let args: Cli = clap::Parser::parse();
     let processed_cli_options = ProcessedCli::new(args);
 
-    match processed_cli_options.target.as_str() {
-        "aarch64-unknown-linux-gnu"
-        | "armv7-unknown-linux-gnueabihf"
-        | "aarch64_be-unknown-linux-gnu" => run(ArmArchitectureTest::create(processed_cli_options)),
-
-        "x86_64-unknown-linux-gnu" => run(X86ArchitectureTest::create(processed_cli_options)),
-        _ => std::process::exit(0),
+    if processed_cli_options.target.starts_with("arm")
+        | processed_cli_options.target.starts_with("aarch64")
+    {
+        run(ArmArchitectureTest::create(processed_cli_options))
+    } else if processed_cli_options.target.starts_with("x86") {
+        run(X86ArchitectureTest::create(processed_cli_options))
+    } else {
+        unimplemented!("Unsupported target {}", processed_cli_options.target)
     }
 }
 
 fn run(test_environment: impl SupportedArchitectureTest) {
     info!("building C binaries");
-    if !test_environment.build_c_file() {
-        std::process::exit(2);
-    }
+    test_environment.generate_c_file();
+
     info!("building Rust binaries");
-    if !test_environment.build_rust_file() {
-        std::process::exit(3);
-    }
-    info!("Running binaries");
-    if !test_environment.compare_outputs() {
-        std::process::exit(1);
-    }
+    test_environment.generate_rust_file();
 }
diff --git a/library/stdarch/crates/intrinsic-test/src/x86/compile.rs b/library/stdarch/crates/intrinsic-test/src/x86/compile.rs
deleted file mode 100644
index 65cd291b1b377..0000000000000
--- a/library/stdarch/crates/intrinsic-test/src/x86/compile.rs
+++ /dev/null
@@ -1,59 +0,0 @@
-use crate::common::cli::ProcessedCli;
-use crate::common::compile_c::{CompilationCommandBuilder, CppCompilation};
-
-pub fn build_cpp_compilation(config: &ProcessedCli) -> Option<CppCompilation> {
-    let cpp_compiler = config.cpp_compiler.as_ref()?;
-
-    // -ffp-contract=off emulates Rust's approach of not fusing separate mul-add operations
-    let mut command = CompilationCommandBuilder::new()
-        .add_arch_flags(["icelake-client"])
-        .set_compiler(cpp_compiler)
-        .set_target(&config.target)
-        .set_opt_level("2")
-        .set_cxx_toolchain_dir(config.cxx_toolchain_dir.as_deref())
-        .set_project_root("c_programs")
-        .add_extra_flags(vec![
-            "-ffp-contract=off",
-            "-Wno-narrowing",
-            "-mavx",
-            "-mavx2",
-            "-mavx512f",
-            "-msse2",
-            "-mavx512vl",
-            "-mavx512bw",
-            "-mavx512dq",
-            "-mavx512cd",
-            "-mavx512fp16",
-            "-msha512",
-            "-msm3",
-            "-msm4",
-            "-mavxvnni",
-            "-mavxvnniint8",
-            "-mavxneconvert",
-            "-mavxifma",
-            "-mavxvnniint16",
-            "-mavx512bf16",
-            "-mavx512bitalg",
-            "-mavx512ifma",
-            "-mavx512vbmi",
-            "-mavx512vbmi2",
-            "-mavx512vnni",
-            "-mavx512vpopcntdq",
-            "-mavx512vp2intersect",
-            "-mbmi",
-            "-mbmi2",
-            "-mgfni",
-            "-mvaes",
-            "-mvpclmulqdq",
-            "-ferror-limit=1000",
-            "-std=c++23",
-        ]);
-
-    if !cpp_compiler.contains("clang") {
-        command = command.add_extra_flag("-flax-vector-conversions");
-    }
-
-    let cpp_compiler = command.into_cpp_compilation();
-
-    Some(cpp_compiler)
-}
diff --git a/library/stdarch/crates/intrinsic-test/src/x86/config.rs b/library/stdarch/crates/intrinsic-test/src/x86/config.rs
index 491dbb5147bbc..e1a006fd9fcde 100644
--- a/library/stdarch/crates/intrinsic-test/src/x86/config.rs
+++ b/library/stdarch/crates/intrinsic-test/src/x86/config.rs
@@ -129,195 +129,6 @@ unsafe fn _mm512_loadu_epi64_to___m512(mem_addr: *const i64) -> __m512 {
     _mm512_castsi512_ps(_mm512_loadu_epi64(mem_addr))
 }
 
-#[inline]
-fn debug_simd_finish<T: core::fmt::Debug, const N: usize>(
-    formatter: &mut core::fmt::Formatter<'_>,
-    type_name: &str,
-    array: &[T; N],
-) -> core::fmt::Result {
-    core::fmt::Formatter::debug_tuple_fields_finish(
-        formatter,
-        type_name,
-        &core::array::from_fn::<&dyn core::fmt::Debug, N, _>(|i| &array[i]),
-    )
-}
-
-trait DebugAs<T> {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result;
-}
-
-impl<T: core::fmt::Display> DebugAs<T> for T {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        write!(f, "{self}")
-    }
-}
-
-macro_rules! impl_debug_as {
-    ($simd:ty, $name:expr, $bits:expr, [$($type:ty),+]) => {
-        $(
-            impl DebugAs<$type> for $simd {
-                fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-                    const ELEMENT_BITS: usize = core::mem::size_of::<$type>() * 8;
-                    const NUM_ELEMENTS: usize = $bits / ELEMENT_BITS;
-                    let array = unsafe { core::mem::transmute::<_, [$type; NUM_ELEMENTS]>(*self) };
-                    debug_simd_finish(f, $name, &array)
-                }
-            }
-        )+
-    };
-}
-
-impl_debug_as!(__m128i, "__m128i", 128, [u8, i8, u16, i16, u32, i32, u64, i64, f16]);
-impl_debug_as!(__m256i, "__m256i", 256, [u8, i8, u16, i16, u32, i32, u64, i64]);
-impl_debug_as!(__m512i, "__m512i", 512, [u8, i8, u16, i16, u32, i32, u64, i64]);
-impl_debug_as!(__m128h, "__m128h", 128, [f32]);
-impl_debug_as!(__m256h, "__m256h", 256, [f32]);
-impl_debug_as!(__m512h, "__m512h", 512, [f32]);
-
-fn debug_as<V, T>(x: V) -> impl core::fmt::Debug 
-where V: DebugAs<T>
-{
-    struct DebugWrapper<V, T>(V, core::marker::PhantomData<T>);
-    impl<V: DebugAs<T>, T> core::fmt::Debug for DebugWrapper<V, T> {
-        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-            self.0.fmt(f)
-        }
-    }
-    DebugWrapper(x, core::marker::PhantomData)
-}
-
-"#;
-
-pub const PLATFORM_C_FORWARD_DECLARATIONS: &str = r#"
-#ifndef X86_DECLARATIONS
-#define X86_DECLARATIONS
-    typedef _Float16 float16_t;
-    typedef float float32_t;
-    typedef double float64_t;
-    
-    #define __int64 long long
-    #define __int32 int
-
-    std::ostream& operator<<(std::ostream& os, _Float16 value);
-    std::ostream& operator<<(std::ostream& os, __m128i value);
-    std::ostream& operator<<(std::ostream& os, __m256i value);
-    std::ostream& operator<<(std::ostream& os, __m512i value);
-    std::ostream& operator<<(std::ostream& os, __mmask8 value);
-    
-    #define _mm512_extract_intrinsic_test_epi8(m, lane) \
-        _mm_extract_epi8(_mm512_extracti64x2_epi64((m), (lane) / 16), (lane) % 16)
-    
-    #define _mm512_extract_intrinsic_test_epi16(m, lane) \
-        _mm_extract_epi16(_mm512_extracti64x2_epi64((m), (lane) / 8), (lane) % 8)
-    
-    #define _mm512_extract_intrinsic_test_epi32(m, lane) \
-        _mm_extract_epi32(_mm512_extracti64x2_epi64((m), (lane) / 4), (lane) % 4)
-    
-    #define _mm512_extract_intrinsic_test_epi64(m, lane) \
-        _mm_extract_epi64(_mm512_extracti64x2_epi64((m), (lane) / 2), (lane) % 2)
-        
-    // Load f16 (__m128h) and cast to integer (__m128i)
-    #define _mm_loadu_ph_to___m128i(mem_addr) _mm_castph_si128(_mm_loadu_ph(mem_addr))
-    #define _mm256_loadu_ph_to___m256i(mem_addr) _mm256_castph_si256(_mm256_loadu_ph(mem_addr))
-    #define _mm512_loadu_ph_to___m512i(mem_addr) _mm512_castph_si512(_mm512_loadu_ph(mem_addr))
-    
-    // Load f32 (__m128) and cast to f16 (__m128h)
-    #define _mm_loadu_ps_to___m128h(mem_addr) _mm_castps_ph(_mm_loadu_ps(mem_addr))
-    #define _mm256_loadu_ps_to___m256h(mem_addr) _mm256_castps_ph(_mm256_loadu_ps(mem_addr))
-    #define _mm512_loadu_ps_to___m512h(mem_addr) _mm512_castps_ph(_mm512_loadu_ps(mem_addr))
-    
-    // Load integer types and cast to double (__m128d, __m256d, __m512d)
-    #define _mm_loadu_epi16_to___m128d(mem_addr) _mm_castsi128_pd(_mm_loadu_si128((__m128i const*)(mem_addr)))
-    #define _mm256_loadu_epi16_to___m256d(mem_addr) _mm256_castsi256_pd(_mm256_loadu_si256((__m256i const*)(mem_addr)))
-    #define _mm512_loadu_epi16_to___m512d(mem_addr) _mm512_castsi512_pd(_mm512_loadu_si512((__m512i const*)(mem_addr)))
-    
-    #define _mm_loadu_epi32_to___m128d(mem_addr) _mm_castsi128_pd(_mm_loadu_si128((__m128i const*)(mem_addr)))
-    #define _mm256_loadu_epi32_to___m256d(mem_addr) _mm256_castsi256_pd(_mm256_loadu_si256((__m256i const*)(mem_addr)))
-    #define _mm512_loadu_epi32_to___m512d(mem_addr) _mm512_castsi512_pd(_mm512_loadu_si512((__m512i const*)(mem_addr)))
-    
-    #define _mm_loadu_epi64_to___m128d(mem_addr) _mm_castsi128_pd(_mm_loadu_si128((__m128i const*)(mem_addr)))
-    #define _mm256_loadu_epi64_to___m256d(mem_addr) _mm256_castsi256_pd(_mm256_loadu_si256((__m256i const*)(mem_addr)))
-    #define _mm512_loadu_epi64_to___m512d(mem_addr) _mm512_castsi512_pd(_mm512_loadu_si512((__m512i const*)(mem_addr)))
-    
-    // Load integer types and cast to float (__m128, __m256, __m512)
-    #define _mm_loadu_epi16_to___m128(mem_addr) _mm_castsi128_ps(_mm_loadu_si128((__m128i const*)(mem_addr)))
-    #define _mm256_loadu_epi16_to___m256(mem_addr) _mm256_castsi256_ps(_mm256_loadu_si256((__m256i const*)(mem_addr)))
-    #define _mm512_loadu_epi16_to___m512(mem_addr) _mm512_castsi512_ps(_mm512_loadu_si512((__m512i const*)(mem_addr)))
-    
-    #define _mm_loadu_epi32_to___m128(mem_addr) _mm_castsi128_ps(_mm_loadu_si128((__m128i const*)(mem_addr)))
-    #define _mm256_loadu_epi32_to___m256(mem_addr) _mm256_castsi256_ps(_mm256_loadu_si256((__m256i const*)(mem_addr)))
-    #define _mm512_loadu_epi32_to___m512(mem_addr) _mm512_castsi512_ps(_mm512_loadu_si512((__m512i const*)(mem_addr)))
-    
-    #define _mm_loadu_epi64_to___m128(mem_addr) _mm_castsi128_ps(_mm_loadu_si128((__m128i const*)(mem_addr)))
-    #define _mm256_loadu_epi64_to___m256(mem_addr) _mm256_castsi256_ps(_mm256_loadu_si256((__m256i const*)(mem_addr)))
-    #define _mm512_loadu_epi64_to___m512(mem_addr) _mm512_castsi512_ps(_mm512_loadu_si512((__m512i const*)(mem_addr)))
-    
-    // T1 is the `To` type, T2 is the `From` type
-    template<typename T1, typename T2> T1 cast(T2 x) {
-      if constexpr ((std::is_integral_v<T1> && std::is_integral_v<T2>) || (std::is_floating_point_v<T1> && std::is_floating_point_v<T2>)) {
-          return x;
-      } else if constexpr (sizeof(T1) <= sizeof(T2)) {
-        T1 ret{};
-        std::memcpy(&ret, &x, sizeof(T1));
-        return ret;
-      } else {
-        static_assert(sizeof(T1) == sizeof(T2) || std::is_convertible_v<T2, T1>,
-                              "T2 must either be convertible to T1, or have the same size as T1!");
-        return T1{};
-      }
-    }
-#endif
-"#;
-pub const PLATFORM_C_DEFINITIONS: &str = r#"
-
-std::ostream& operator<<(std::ostream& os, _Float16 value) {
-    os << static_cast<float>(value);
-    return os;
-}
-
-std::ostream& operator<<(std::ostream& os, __m128i value) {
-    void* temp = malloc(sizeof(__m128i));
-    _mm_storeu_si128((__m128i*)temp, value);
-    std::stringstream ss;
-    
-    ss << "0x";
-    for(int i = 0; i < 16; i++) {
-        ss << std::setfill('0') << std::setw(2) << std::hex << ((char*)temp)[i];
-    }
-    os << ss.str();
-    return os;
-}
-
-std::ostream& operator<<(std::ostream& os, __m256i value) {
-    void* temp = malloc(sizeof(__m256i));
-    _mm256_storeu_si256((__m256i*)temp, value);
-    std::stringstream ss;
-    
-    ss << "0x";
-    for(int i = 0; i < 32; i++) {
-        ss << std::setfill('0') << std::setw(2) << std::hex << ((char*)temp)[i];
-    }
-    os << ss.str();
-    return os;
-}
-
-std::ostream& operator<<(std::ostream& os, __m512i value) {
-    void* temp = malloc(sizeof(__m512i));
-    _mm512_storeu_si512((__m512i*)temp, value);
-    std::stringstream ss;
-    
-    ss << "0x";
-    for(int i = 0; i < 64; i++) {
-        ss << std::setfill('0') << std::setw(2) << std::hex << ((char*)temp)[i];
-    }
-    os << ss.str();
-    return os;
-}
-
-std::ostream& operator<<(std::ostream& os, __mmask8 value) {
-    os << static_cast<int>(value);
-    return os;
-}
 "#;
 
 pub const PLATFORM_RUST_CFGS: &str = r#"
diff --git a/library/stdarch/crates/intrinsic-test/src/x86/mod.rs b/library/stdarch/crates/intrinsic-test/src/x86/mod.rs
index f2baf070714c1..0e2998f1a3960 100644
--- a/library/stdarch/crates/intrinsic-test/src/x86/mod.rs
+++ b/library/stdarch/crates/intrinsic-test/src/x86/mod.rs
@@ -1,4 +1,3 @@
-mod compile;
 mod config;
 mod constraint;
 mod intrinsic;
@@ -7,7 +6,6 @@ mod xml_parser;
 
 use crate::common::SupportedArchitectureTest;
 use crate::common::cli::ProcessedCli;
-use crate::common::compile_c::CppCompilation;
 use crate::common::intrinsic::Intrinsic;
 use crate::common::intrinsic_helpers::TypeKind;
 use intrinsic::X86IntrinsicType;
@@ -29,15 +27,9 @@ impl SupportedArchitectureTest for X86ArchitectureTest {
         &self.intrinsics
     }
 
-    fn cpp_compilation(&self) -> Option<CppCompilation> {
-        compile::build_cpp_compilation(&self.cli_options)
-    }
-
     const NOTICE: &str = config::NOTICE;
 
     const PLATFORM_C_HEADERS: &[&str] = &["immintrin.h", "cstddef", "cstdint"];
-    const PLATFORM_C_DEFINITIONS: &str = config::PLATFORM_C_DEFINITIONS;
-    const PLATFORM_C_FORWARD_DECLARATIONS: &str = config::PLATFORM_C_FORWARD_DECLARATIONS;
 
     const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS;
     const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS;
diff --git a/library/stdarch/crates/intrinsic-test/src/x86/types.rs b/library/stdarch/crates/intrinsic-test/src/x86/types.rs
index 2391ee9c2df49..83581c0dac6ab 100644
--- a/library/stdarch/crates/intrinsic-test/src/x86/types.rs
+++ b/library/stdarch/crates/intrinsic-test/src/x86/types.rs
@@ -1,11 +1,8 @@
 use std::str::FromStr;
 
 use itertools::Itertools;
-use regex::Regex;
 
 use super::intrinsic::X86IntrinsicType;
-use crate::common::cli::Language;
-use crate::common::indentation::Indentation;
 use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, TypeKind};
 use crate::x86::xml_parser::Parameter;
 
@@ -26,82 +23,8 @@ impl IntrinsicTypeDefinition for X86IntrinsicType {
             .replace("const ", "")
     }
 
-    fn c_single_vector_type(&self) -> String {
-        // matches __m128, __m256 and similar types
-        let re = Regex::new(r"__m\d+").unwrap();
-        if re.is_match(self.param.type_data.as_str()) {
-            self.param.type_data.clone()
-        } else {
-            unreachable!("Shouldn't be called on this type")
-        }
-    }
-
-    // fn rust_type(&self) -> String {
-    //     // handling edge cases first
-    //     // the general handling is implemented below
-    //     if let Some(val) = self.metadata.get("type") {
-    //         match val.as_str() {
-    //             "__m128 const *" => {
-    //                 return "&__m128".to_string();
-    //             }
-    //             "__m128d const *" => {
-    //                 return "&__m128d".to_string();
-    //             }
-    //             "const void*" => {
-    //                 return "&__m128d".to_string();
-    //             }
-    //             _ => {}
-    //         }
-    //     }
-
-    //     if self.kind() == TypeKind::Void && self.ptr {
-    //         // this has been handled by default settings in
-    //         // the from_param function of X86IntrinsicType
-    //         unreachable!()
-    //     }
-
-    //     // general handling cases
-    //     let core_part = if self.kind() == TypeKind::Mask {
-    //         // all types of __mmask<int> are handled here
-    //         format!("__mask{}", self.bit_len.unwrap())
-    //     } else if self.simd_len.is_some() {
-    //         // all types of __m<int> vector types are handled here
-    //         let re = Regex::new(r"\__m\d+[a-z]*").unwrap();
-    //         let rust_type = self
-    //             .metadata
-    //             .get("type")
-    //             .map(|val| re.find(val).unwrap().as_str());
-    //         rust_type.unwrap().to_string()
-    //     } else {
-    //         format!(
-    //             "{}{}",
-    //             self.kind.rust_prefix().to_string(),
-    //             self.bit_len.unwrap()
-    //         )
-    //     };
-
-    //     // extracting "memsize" so that even vector types can be involved
-    //     let memwidth = self
-    //         .metadata
-    //         .get("memwidth")
-    //         .map(|n| str::parse::<u32>(n).unwrap());
-    //     let prefix_part = if self.ptr && self.constant && self.bit_len.eq(&memwidth) {
-    //         "&"
-    //     } else if self.ptr && self.bit_len.eq(&memwidth) {
-    //         "&mut "
-    //     } else if self.ptr && self.constant {
-    //         "*const "
-    //     } else if self.ptr {
-    //         "*mut "
-    //     } else {
-    //         ""
-    //     };
-
-    //     return prefix_part.to_string() + core_part.as_str();
-    // }
-
     /// Determines the load function for this type.
-    fn get_load_function(&self, _language: Language) -> String {
+    fn get_load_function(&self) -> String {
         let type_value = self.param.type_data.clone();
         if type_value.len() == 0 {
             unimplemented!("the value for key 'type' is not present!");
@@ -168,82 +91,6 @@ impl IntrinsicTypeDefinition for X86IntrinsicType {
         }
     }
 
-    /// Generates a std::cout for the intrinsics results that will match the
-    /// rust debug output format for the return type. The generated line assumes
-    /// there is an int i in scope which is the current pass number.
-    fn print_result_c(&self, indentation: Indentation, additional: &str) -> String {
-        let lanes = if self.num_lanes() > 1 {
-            (0..self.num_lanes())
-                .map(|idx| -> std::string::String {
-                    let cast_type = self.c_promotion();
-                    let lane_fn = self.get_lane_function();
-                    if cast_type.len() > 2 {
-                        format!("cast<{cast_type}>({lane_fn}(__return_value, {idx}))")
-                    } else {
-                        format!("{lane_fn}(__return_value, {idx})")
-                    }
-                })
-                .collect::<Vec<_>>()
-                .join(r#" << ", " << "#)
-        } else {
-            format!(
-                "{promote}cast<{cast}>(__return_value)",
-                cast = match self.kind() {
-                    TypeKind::Void => "void".to_string(),
-                    TypeKind::Float if self.inner_size() == 64 => "double".to_string(),
-                    TypeKind::Float if self.inner_size() == 32 => "float".to_string(),
-                    TypeKind::Mask => format!(
-                        "__mmask{}",
-                        self.bit_len.expect(format!("self: {self:#?}").as_str())
-                    ),
-                    TypeKind::Vector => format!(
-                        "__m{}i",
-                        self.bit_len.expect(format!("self: {self:#?}").as_str())
-                    ),
-                    _ => self.c_scalar_type(),
-                },
-                promote = self.generate_final_type_cast(),
-            )
-        };
-
-        format!(
-            r#"{indentation}std::cout << "Result {additional}-" << i+1 << ": {ty}" << std::fixed << std::setprecision(150) <<  {lanes} << "{close}" << std::endl;"#,
-            ty = if self.is_simd() {
-                format!("{}(", self.c_type())
-            } else {
-                String::from("")
-            },
-            close = if self.is_simd() { ")" } else { "" },
-        )
-    }
-
-    /// Determines the get lane function for this type.
-    fn get_lane_function(&self) -> String {
-        let total_vector_bits: Option<u32> = self
-            .simd_len
-            .zip(self.bit_len)
-            .and_then(|(simd_len, bit_len)| Some(simd_len * bit_len));
-
-        match (self.bit_len, total_vector_bits) {
-            (Some(8), Some(128)) => String::from("(uint8_t)_mm_extract_epi8"),
-            (Some(16), Some(128)) => String::from("(uint16_t)_mm_extract_epi16"),
-            (Some(32), Some(128)) => String::from("(uint32_t)_mm_extract_epi32"),
-            (Some(64), Some(128)) => String::from("(uint64_t)_mm_extract_epi64"),
-            (Some(8), Some(256)) => String::from("(uint8_t)_mm256_extract_epi8"),
-            (Some(16), Some(256)) => String::from("(uint16_t)_mm256_extract_epi16"),
-            (Some(32), Some(256)) => String::from("(uint32_t)_mm256_extract_epi32"),
-            (Some(64), Some(256)) => String::from("(uint64_t)_mm256_extract_epi64"),
-            (Some(8), Some(512)) => String::from("(uint8_t)_mm512_extract_intrinsic_test_epi8"),
-            (Some(16), Some(512)) => String::from("(uint16_t)_mm512_extract_intrinsic_test_epi16"),
-            (Some(32), Some(512)) => String::from("(uint32_t)_mm512_extract_intrinsic_test_epi32"),
-            (Some(64), Some(512)) => String::from("(uint64_t)_mm512_extract_intrinsic_test_epi64"),
-            _ => unreachable!(
-                "invalid length for vector argument: {:?}, {:?}",
-                self.bit_len, self.simd_len
-            ),
-        }
-    }
-
     fn rust_scalar_type(&self) -> String {
         let prefix = match self.data.kind {
             TypeKind::Mask => String::from("__mmask"),

From a6c9e823294141ad655eb447f50b80422e4230b9 Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Tue, 12 May 2026 09:14:34 +0200
Subject: [PATCH 52/78] zeroall/zeroupper *are* supported by Miri

---
 library/stdarch/crates/core_arch/src/x86/avx.rs | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/x86/avx.rs b/library/stdarch/crates/core_arch/src/x86/avx.rs
index 74fc2db13dcdc..37fed5fe08fc7 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx.rs
@@ -1097,6 +1097,9 @@ pub const fn _mm256_cvtsi256_si32(a: __m256i) -> i32 {
 
 /// Zeroes the contents of all XMM or YMM registers.
 ///
+/// This operation is purely a performance hint for the CPU and has no effect on the Abstract
+/// Machine state.
+///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroall)
 #[inline]
 #[target_feature(enable = "avx")]
@@ -1109,6 +1112,9 @@ pub fn _mm256_zeroall() {
 /// Zeroes the upper 128 bits of all YMM registers;
 /// the lower 128-bits of the registers are unmodified.
 ///
+/// This operation is purely a performance hint for the CPU and has no effect on the Abstract
+/// Machine state.
+///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroupper)
 #[inline]
 #[target_feature(enable = "avx")]
@@ -4007,13 +4013,11 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    #[cfg_attr(miri, ignore)] // Register-level operation not supported by Miri
     fn test_mm256_zeroall() {
         _mm256_zeroall();
     }
 
     #[simd_test(enable = "avx")]
-    #[cfg_attr(miri, ignore)] // Register-level operation not supported by Miri
     fn test_mm256_zeroupper() {
         _mm256_zeroupper();
     }

From 3c7dee460c61870f780c8069319f38fe3d71e04c Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Tue, 12 May 2026 09:14:53 +0200
Subject: [PATCH 53/78] Update/add comments for miri-ignored tests

---
 library/stdarch/crates/core_arch/src/x86/avx.rs     | 6 +++---
 library/stdarch/crates/core_arch/src/x86/avx512f.rs | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/x86/avx.rs b/library/stdarch/crates/core_arch/src/x86/avx.rs
index 37fed5fe08fc7..ef434205b52a0 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx.rs
@@ -4488,7 +4488,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    #[cfg_attr(miri, ignore)] // Non-temporal store, which is not supported by Miri
+    #[cfg_attr(miri, ignore)] // Inline asm (for non-temporal store), which is not supported by Miri
     fn test_mm256_stream_si256() {
         let a = _mm256_setr_epi64x(1, 2, 3, 4);
         let mut r = _mm256_undefined_si256();
@@ -4500,7 +4500,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    #[cfg_attr(miri, ignore)] // Non-temporal store, which is not supported by Miri
+    #[cfg_attr(miri, ignore)] // Inline asm (for non-temporal store), which is not supported by Miri
     fn test_mm256_stream_pd() {
         #[repr(align(32))]
         struct Memory {
@@ -4519,7 +4519,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    #[cfg_attr(miri, ignore)] // Non-temporal store, which is not supported by Miri
+    #[cfg_attr(miri, ignore)] // Inline asm (for non-temporal store), which is not supported by Miri
     fn test_mm256_stream_ps() {
         #[repr(align(32))]
         struct Memory {
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512f.rs b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
index 94c4269c8fe51..66ea63b674f12 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512f.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
@@ -58246,7 +58246,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    #[cfg_attr(miri, ignore)]
+    #[cfg_attr(miri, ignore)] // Inline asm (for non-temporal store), which is not supported by Miri
     fn test_mm512_stream_ps() {
         #[repr(align(64))]
         struct Memory {
@@ -58265,7 +58265,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    #[cfg_attr(miri, ignore)]
+    #[cfg_attr(miri, ignore)] // Inline asm (for non-temporal store), which is not supported by Miri
     fn test_mm512_stream_pd() {
         #[repr(align(64))]
         struct Memory {
@@ -58284,7 +58284,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    #[cfg_attr(miri, ignore)]
+    #[cfg_attr(miri, ignore)] // Inline asm (for non-temporal store), which is not supported by Miri
     fn test_mm512_stream_si512() {
         #[repr(align(64))]
         struct Memory {

From e91ca04c54a1bd1ae9fb8532878dec0315850e37 Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Tue, 12 May 2026 09:53:34 +0200
Subject: [PATCH 54/78] explain ignore aarch64 tests

---
 library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs    | 4 ++--
 library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs | 5 ++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
index 7ce79671f7e4e..c66702814cfb2 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
@@ -789,7 +789,7 @@ mod tests {
     macro_rules! wide_store_load_roundtrip_fp16 {
         ($( $name:ident $args:tt);* $(;)?) => {
             $(
-                #[cfg_attr(miri, ignore)]
+                #[cfg_attr(miri, ignore)] // uses unsupported vendor intrinsics
                 #[simd_test(enable = "neon,fp16")]
                 #[cfg(not(target_arch = "arm64ec"))]
                 unsafe fn $name() {
@@ -1060,7 +1060,7 @@ mod tests {
     macro_rules! lane_wide_store_load_roundtrip_neon {
         ($( $name:ident $args:tt);* $(;)?) => {
             $(
-                #[cfg_attr(miri, ignore)]
+                #[cfg_attr(miri, ignore)] // uses unsupported vendor intrinsics
                 #[simd_test(enable = "neon")]
                 unsafe fn $name() {
                     lane_wide_store_load_roundtrip! $args;
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
index 12d3c090ed9fa..4cc7f64f2dc29 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
@@ -5248,11 +5248,10 @@ mod tests {
         };
     }
 
-    // Most of these are implemented with builtins, which miri can't handle
     macro_rules! lane_wide_store_load_roundtrip_neon {
         ($( $name:ident $args:tt);* $(;)?) => {
             $(
-                #[cfg_attr(miri, ignore)]
+                #[cfg_attr(miri, ignore)] // uses unsupported vendor intrinsics
                 #[simd_test(enable = "neon")]
                 unsafe fn $name() {
                     lane_wide_store_load_roundtrip! $args;
@@ -5264,7 +5263,7 @@ mod tests {
     macro_rules! lane_wide_store_load_roundtrip_fp16 {
         ($( $name:ident $args:tt);* $(;)?) => {
             $(
-                #[cfg_attr(miri, ignore)]
+                #[cfg_attr(miri, ignore)] // uses unsupported vendor intrinsics
                 #[simd_test(enable = "neon,fp16")]
                 #[cfg(not(target_arch = "arm64ec"))]
                 unsafe fn $name() {

From 35d77fbdd0c67756c63eceb6f41238db64f3fedf Mon Sep 17 00:00:00 2001
From: WANG Rui <wangrui@loongson.cn>
Date: Thu, 7 May 2026 15:37:21 +0800
Subject: [PATCH 55/78] loongarch: Use `intrinsics::simd` for vpick{ev,od}

---
 .../src/loongarch64/lasx/generated.rs         | 72 -------------------
 .../src/loongarch64/lasx/portable.rs          | 70 ++++++++++++++++++
 .../src/loongarch64/lsx/generated.rs          | 72 -------------------
 .../core_arch/src/loongarch64/lsx/portable.rs | 56 +++++++++++++++
 .../crates/stdarch-gen-loongarch/lasx.spec    |  8 +++
 .../crates/stdarch-gen-loongarch/lsx.spec     |  8 +++
 .../src/portable-intrinsics.txt               | 16 +++++
 7 files changed, 158 insertions(+), 144 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs b/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
index f8667887c74c1..e05e19457319d 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
@@ -155,22 +155,6 @@ unsafe extern "unadjusted" {
     fn __lasx_xvrepl128vei_w(a: __v8i32, b: u32) -> __v8i32;
     #[link_name = "llvm.loongarch.lasx.xvrepl128vei.d"]
     fn __lasx_xvrepl128vei_d(a: __v4i64, b: u32) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvpickev.b"]
-    fn __lasx_xvpickev_b(a: __v32i8, b: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvpickev.h"]
-    fn __lasx_xvpickev_h(a: __v16i16, b: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvpickev.w"]
-    fn __lasx_xvpickev_w(a: __v8i32, b: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvpickev.d"]
-    fn __lasx_xvpickev_d(a: __v4i64, b: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvpickod.b"]
-    fn __lasx_xvpickod_b(a: __v32i8, b: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvpickod.h"]
-    fn __lasx_xvpickod_h(a: __v16i16, b: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvpickod.w"]
-    fn __lasx_xvpickod_w(a: __v8i32, b: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvpickod.d"]
-    fn __lasx_xvpickod_d(a: __v4i64, b: __v4i64) -> __v4i64;
     #[link_name = "llvm.loongarch.lasx.xvilvh.b"]
     fn __lasx_xvilvh_b(a: __v32i8, b: __v32i8) -> __v32i8;
     #[link_name = "llvm.loongarch.lasx.xvilvh.h"]
@@ -1653,62 +1637,6 @@ pub fn lasx_xvrepl128vei_d<const IMM1: u32>(a: m256i) -> m256i {
     unsafe { transmute(__lasx_xvrepl128vei_d(transmute(a), IMM1)) }
 }
 
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvpickev_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvpickev_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvpickev_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvpickev_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvpickev_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvpickev_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvpickev_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvpickev_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvpickod_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvpickod_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvpickod_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvpickod_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvpickod_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvpickod_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvpickod_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvpickod_d(transmute(a), transmute(b))) }
-}
-
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs b/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs
index 89535582101fc..1d44f418bfbcd 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs
@@ -5,6 +5,68 @@ use crate::core_arch::simd::{self as cs, *};
 use crate::intrinsics::simd as is;
 use crate::mem::transmute;
 
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+const unsafe fn simd_pickev_b<T: Copy>(a: T, b: T) -> T {
+    simd_shuffle!(
+        b,
+        a,
+        [
+            0, 2, 4, 6, 8, 10, 12, 14, 32, 34, 36, 38, 40, 42, 44, 46,
+            16, 18, 20, 22, 24, 26, 28, 30, 48, 50, 52, 54, 56, 58, 60, 62
+        ]
+    )
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+const unsafe fn simd_pickev_d<T: Copy>(a: T, b: T) -> T {
+    simd_shuffle!(b, a, [0, 4, 2, 6])
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+const unsafe fn simd_pickev_w<T: Copy>(a: T, b: T) -> T {
+    simd_shuffle!(b, a, [0, 2, 8, 10, 4, 6, 12, 14])
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+const unsafe fn simd_pickev_h<T: Copy>(a: T, b: T) -> T {
+    simd_shuffle!(b, a, [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30])
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+const unsafe fn simd_pickod_b<T: Copy>(a: T, b: T) -> T {
+    simd_shuffle!(
+        b,
+        a,
+        [
+            1, 3, 5, 7, 9, 11, 13, 15, 33, 35, 37, 39, 41, 43, 45, 47,
+            17, 19, 21, 23, 25, 27, 29, 31, 49, 51, 53, 55, 57, 59, 61, 63
+        ]
+    )
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+const unsafe fn simd_pickod_d<T: Copy>(a: T, b: T) -> T {
+    simd_shuffle!(b, a, [1, 5, 3, 7])
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+const unsafe fn simd_pickod_w<T: Copy>(a: T, b: T) -> T {
+    simd_shuffle!(b, a, [1, 3, 9, 11, 5, 7, 13, 15])
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+const unsafe fn simd_pickod_h<T: Copy>(a: T, b: T) -> T {
+    simd_shuffle!(b, a, [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31])
+}
+
 impl_vv!("lasx", lasx_xvpcnt_b, is::simd_ctpop, m256i, i8x32);
 impl_vv!("lasx", lasx_xvpcnt_h, is::simd_ctpop, m256i, i16x16);
 impl_vv!("lasx", lasx_xvpcnt_w, is::simd_ctpop, m256i, i32x8);
@@ -160,6 +222,14 @@ impl_vvv!("lasx", lasx_xvabsd_bu, ls::simd_absd, m256i, u8x32);
 impl_vvv!("lasx", lasx_xvabsd_hu, ls::simd_absd, m256i, u16x16);
 impl_vvv!("lasx", lasx_xvabsd_wu, ls::simd_absd, m256i, u32x8);
 impl_vvv!("lasx", lasx_xvabsd_du, ls::simd_absd, m256i, u64x4);
+impl_vvv!("lasx", lasx_xvpickev_b, simd_pickev_b, m256i, i8x32);
+impl_vvv!("lasx", lasx_xvpickev_h, simd_pickev_h, m256i, i16x16);
+impl_vvv!("lasx", lasx_xvpickev_w, simd_pickev_w, m256i, i32x8);
+impl_vvv!("lasx", lasx_xvpickev_d, simd_pickev_d, m256i, i64x4);
+impl_vvv!("lasx", lasx_xvpickod_b, simd_pickod_b, m256i, i8x32);
+impl_vvv!("lasx", lasx_xvpickod_h, simd_pickod_h, m256i, i16x16);
+impl_vvv!("lasx", lasx_xvpickod_w, simd_pickod_w, m256i, i32x8);
+impl_vvv!("lasx", lasx_xvpickod_d, simd_pickod_d, m256i, i64x4);
 
 impl_vuv!("lasx", lasx_xvslli_b, is::simd_shl, m256i, i8x32);
 impl_vuv!("lasx", lasx_xvslli_h, is::simd_shl, m256i, i16x16);
diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs b/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
index 41ced43454273..767be195292f2 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
@@ -163,22 +163,6 @@ unsafe extern "unadjusted" {
     fn __lsx_vreplvei_w(a: __v4i32, b: u32) -> __v4i32;
     #[link_name = "llvm.loongarch.lsx.vreplvei.d"]
     fn __lsx_vreplvei_d(a: __v2i64, b: u32) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vpickev.b"]
-    fn __lsx_vpickev_b(a: __v16i8, b: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vpickev.h"]
-    fn __lsx_vpickev_h(a: __v8i16, b: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vpickev.w"]
-    fn __lsx_vpickev_w(a: __v4i32, b: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vpickev.d"]
-    fn __lsx_vpickev_d(a: __v2i64, b: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vpickod.b"]
-    fn __lsx_vpickod_b(a: __v16i8, b: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vpickod.h"]
-    fn __lsx_vpickod_h(a: __v8i16, b: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vpickod.w"]
-    fn __lsx_vpickod_w(a: __v4i32, b: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vpickod.d"]
-    fn __lsx_vpickod_d(a: __v2i64, b: __v2i64) -> __v2i64;
     #[link_name = "llvm.loongarch.lsx.vilvh.b"]
     fn __lsx_vilvh_b(a: __v16i8, b: __v16i8) -> __v16i8;
     #[link_name = "llvm.loongarch.lsx.vilvh.h"]
@@ -1593,62 +1577,6 @@ pub fn lsx_vreplvei_d<const IMM1: u32>(a: m128i) -> m128i {
     unsafe { transmute(__lsx_vreplvei_d(transmute(a), IMM1)) }
 }
 
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpickev_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpickev_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpickev_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpickev_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpickev_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpickev_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpickev_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpickev_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpickod_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpickod_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpickod_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpickod_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpickod_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpickod_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpickod_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpickod_d(transmute(a), transmute(b))) }
-}
-
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs b/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs
index 16f00582357b0..24f9af851d8c1 100644
--- a/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs
@@ -5,6 +5,54 @@ use crate::core_arch::simd::{self as cs, *};
 use crate::intrinsics::simd as is;
 use crate::mem::transmute;
 
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+const unsafe fn simd_pickev_b<T: Copy>(a: T, b: T) -> T {
+    simd_shuffle!(b, a, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30])
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+const unsafe fn simd_pickev_h<T: Copy>(a: T, b: T) -> T {
+    simd_shuffle!(b, a, [0, 2, 4, 6, 8, 10, 12, 14])
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+const unsafe fn simd_pickev_w<T: Copy>(a: T, b: T) -> T {
+    simd_shuffle!(b, a, [0, 2, 4, 6])
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+const unsafe fn simd_pickev_d<T: Copy>(a: T, b: T) -> T {
+    simd_shuffle!(b, a, [0, 2])
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+const unsafe fn simd_pickod_b<T: Copy>(a: T, b: T) -> T {
+    simd_shuffle!(b, a, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31])
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+const unsafe fn simd_pickod_h<T: Copy>(a: T, b: T) -> T {
+    simd_shuffle!(b, a, [1, 3, 5, 7, 9, 11, 13, 15])
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+const unsafe fn simd_pickod_w<T: Copy>(a: T, b: T) -> T {
+    simd_shuffle!(b, a, [1, 3, 5, 7])
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+const unsafe fn simd_pickod_d<T: Copy>(a: T, b: T) -> T {
+    simd_shuffle!(b, a, [1, 3])
+}
+
 impl_vv!("lsx", lsx_vpcnt_b, is::simd_ctpop, m128i, i8x16);
 impl_vv!("lsx", lsx_vpcnt_h, is::simd_ctpop, m128i, i16x8);
 impl_vv!("lsx", lsx_vpcnt_w, is::simd_ctpop, m128i, i32x4);
@@ -160,6 +208,14 @@ impl_vvv!("lsx", lsx_vabsd_bu, ls::simd_absd, m128i, u8x16);
 impl_vvv!("lsx", lsx_vabsd_hu, ls::simd_absd, m128i, u16x8);
 impl_vvv!("lsx", lsx_vabsd_wu, ls::simd_absd, m128i, u32x4);
 impl_vvv!("lsx", lsx_vabsd_du, ls::simd_absd, m128i, u64x2);
+impl_vvv!("lsx", lsx_vpickev_b, simd_pickev_b, m128i, i8x16);
+impl_vvv!("lsx", lsx_vpickev_h, simd_pickev_h, m128i, i16x8);
+impl_vvv!("lsx", lsx_vpickev_w, simd_pickev_w, m128i, i32x4);
+impl_vvv!("lsx", lsx_vpickev_d, simd_pickev_d, m128i, i64x2);
+impl_vvv!("lsx", lsx_vpickod_b, simd_pickod_b, m128i, i8x16);
+impl_vvv!("lsx", lsx_vpickod_h, simd_pickod_h, m128i, i16x8);
+impl_vvv!("lsx", lsx_vpickod_w, simd_pickod_w, m128i, i32x4);
+impl_vvv!("lsx", lsx_vpickod_d, simd_pickod_d, m128i, i64x2);
 
 impl_vuv!("lsx", lsx_vslli_b, is::simd_shl, m128i, i8x16);
 impl_vuv!("lsx", lsx_vslli_h, is::simd_shl, m128i, i16x8);
diff --git a/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec b/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec
index ae69f194103dd..867e071b62edc 100644
--- a/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec
+++ b/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec
@@ -1420,41 +1420,49 @@ asm-fmts = xd, xj, ui1
 data-types = V4DI, V4DI, UQI
 
 /// lasx_xvpickev_b
+impl = portable
 name = lasx_xvpickev_b
 asm-fmts = xd, xj, xk
 data-types = V32QI, V32QI, V32QI
 
 /// lasx_xvpickev_h
+impl = portable
 name = lasx_xvpickev_h
 asm-fmts = xd, xj, xk
 data-types = V16HI, V16HI, V16HI
 
 /// lasx_xvpickev_w
+impl = portable
 name = lasx_xvpickev_w
 asm-fmts = xd, xj, xk
 data-types = V8SI, V8SI, V8SI
 
 /// lasx_xvpickev_d
+impl = portable
 name = lasx_xvpickev_d
 asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI
 
 /// lasx_xvpickod_b
+impl = portable
 name = lasx_xvpickod_b
 asm-fmts = xd, xj, xk
 data-types = V32QI, V32QI, V32QI
 
 /// lasx_xvpickod_h
+impl = portable
 name = lasx_xvpickod_h
 asm-fmts = xd, xj, xk
 data-types = V16HI, V16HI, V16HI
 
 /// lasx_xvpickod_w
+impl = portable
 name = lasx_xvpickod_w
 asm-fmts = xd, xj, xk
 data-types = V8SI, V8SI, V8SI
 
 /// lasx_xvpickod_d
+impl = portable
 name = lasx_xvpickod_d
 asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI
diff --git a/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec b/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec
index 48e98d59b602c..b9df7bd96b9cb 100644
--- a/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec
+++ b/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec
@@ -1440,41 +1440,49 @@ asm-fmts = vd, vj, ui1
 data-types = V2DI, V2DI, UQI
 
 /// lsx_vpickev_b
+impl = portable
 name = lsx_vpickev_b
 asm-fmts = vd, vj, vk
 data-types = V16QI, V16QI, V16QI
 
 /// lsx_vpickev_h
+impl = portable
 name = lsx_vpickev_h
 asm-fmts = vd, vj, vk
 data-types = V8HI, V8HI, V8HI
 
 /// lsx_vpickev_w
+impl = portable
 name = lsx_vpickev_w
 asm-fmts = vd, vj, vk
 data-types = V4SI, V4SI, V4SI
 
 /// lsx_vpickev_d
+impl = portable
 name = lsx_vpickev_d
 asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI
 
 /// lsx_vpickod_b
+impl = portable
 name = lsx_vpickod_b
 asm-fmts = vd, vj, vk
 data-types = V16QI, V16QI, V16QI
 
 /// lsx_vpickod_h
+impl = portable
 name = lsx_vpickod_h
 asm-fmts = vd, vj, vk
 data-types = V8HI, V8HI, V8HI
 
 /// lsx_vpickod_w
+impl = portable
 name = lsx_vpickod_w
 asm-fmts = vd, vj, vk
 data-types = V4SI, V4SI, V4SI
 
 /// lsx_vpickod_d
+impl = portable
 name = lsx_vpickod_d
 asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI
diff --git a/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt b/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt
index e9b7b04f27235..e07ac41f9c44e 100644
--- a/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt
+++ b/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt
@@ -231,6 +231,14 @@ lsx_vabsd_bu
 lsx_vabsd_hu
 lsx_vabsd_wu
 lsx_vabsd_du
+lsx_vpickev_b
+lsx_vpickev_h
+lsx_vpickev_w
+lsx_vpickev_d
+lsx_vpickod_b
+lsx_vpickod_h
+lsx_vpickod_w
+lsx_vpickod_d
 
 # LASX intrinsics
 lasx_xvsll_b
@@ -459,3 +467,11 @@ lasx_xvabsd_bu
 lasx_xvabsd_hu
 lasx_xvabsd_wu
 lasx_xvabsd_du
+lasx_xvpickev_b
+lasx_xvpickev_h
+lasx_xvpickev_w
+lasx_xvpickev_d
+lasx_xvpickod_b
+lasx_xvpickod_h
+lasx_xvpickod_w
+lasx_xvpickod_d

From ee0c01647d9c616cb071a529f6bc8ab86aad23c5 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Thu, 30 Apr 2026 06:30:03 +0530
Subject: [PATCH 56/78] Generate rust bindings and test code

---
 .../crates/intrinsic-test/src/arm/config.rs   |   1 -
 .../crates/intrinsic-test/src/arm/mod.rs      |  14 +-
 .../crates/intrinsic-test/src/arm/types.rs    |  20 +-
 .../intrinsic-test/src/common/argument.rs     |  12 +-
 .../crates/intrinsic-test/src/common/gen_c.rs |  67 ++---
 .../intrinsic-test/src/common/gen_rust.rs     | 276 +++++++++++-------
 .../intrinsic-test/src/common/intrinsic.rs    |  35 +++
 .../src/common/intrinsic_helpers.rs           |  24 +-
 .../crates/intrinsic-test/src/common/mod.rs   |  45 +--
 .../crates/intrinsic-test/src/x86/config.rs   |  15 +-
 .../crates/intrinsic-test/src/x86/mod.rs      |  49 +++-
 .../crates/intrinsic-test/src/x86/types.rs    |  60 ++--
 12 files changed, 377 insertions(+), 241 deletions(-)

diff --git a/library/stdarch/crates/intrinsic-test/src/arm/config.rs b/library/stdarch/crates/intrinsic-test/src/arm/config.rs
index 9371db737ea22..87b8ebfa183e7 100644
--- a/library/stdarch/crates/intrinsic-test/src/arm/config.rs
+++ b/library/stdarch/crates/intrinsic-test/src/arm/config.rs
@@ -16,7 +16,6 @@ pub const PLATFORM_RUST_CFGS: &str = r#"
 #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_feat_lut))]
 #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fp8))]
 #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(faminmax))]
-#![feature(fmt_helpers_for_derive)]
 #![feature(stdarch_neon_f16)]
 
 #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
diff --git a/library/stdarch/crates/intrinsic-test/src/arm/mod.rs b/library/stdarch/crates/intrinsic-test/src/arm/mod.rs
index e365a5372f92e..9bf6c95ffdcb3 100644
--- a/library/stdarch/crates/intrinsic-test/src/arm/mod.rs
+++ b/library/stdarch/crates/intrinsic-test/src/arm/mod.rs
@@ -13,16 +13,11 @@ use json_parser::get_neon_intrinsics;
 
 pub struct ArmArchitectureTest {
     intrinsics: Vec<Intrinsic<ArmIntrinsicType>>,
-    cli_options: ProcessedCli,
 }
 
 impl SupportedArchitectureTest for ArmArchitectureTest {
     type IntrinsicImpl = ArmIntrinsicType;
 
-    fn cli_options(&self) -> &ProcessedCli {
-        &self.cli_options
-    }
-
     fn intrinsics(&self) -> &[Intrinsic<ArmIntrinsicType>] {
         &self.intrinsics
     }
@@ -34,6 +29,10 @@ impl SupportedArchitectureTest for ArmArchitectureTest {
     const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS;
     const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS;
 
+    fn arch_flags(&self) -> Vec<&str> {
+        vec!["-march=armv8.6a+crypto+crc+dotprod+fp16"]
+    }
+
     fn create(cli_options: ProcessedCli) -> Self {
         let a32 = cli_options.target.starts_with("armv7");
         let mut intrinsics = get_neon_intrinsics(&cli_options.filename, &cli_options.target)
@@ -60,9 +59,6 @@ impl SupportedArchitectureTest for ArmArchitectureTest {
             .take(sample_size)
             .collect::<Vec<_>>();
 
-        Self {
-            intrinsics,
-            cli_options,
-        }
+        Self { intrinsics }
     }
 }
diff --git a/library/stdarch/crates/intrinsic-test/src/arm/types.rs b/library/stdarch/crates/intrinsic-test/src/arm/types.rs
index 8424379db59f2..e9614eba218cb 100644
--- a/library/stdarch/crates/intrinsic-test/src/arm/types.rs
+++ b/library/stdarch/crates/intrinsic-test/src/arm/types.rs
@@ -6,8 +6,8 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
     fn c_type(&self) -> String {
         let prefix = self.kind.c_prefix();
 
-        if let (Some(bit_len), simd_len, vec_len) = (self.bit_len, self.simd_len, self.vec_len) {
-            match (simd_len, vec_len) {
+        if let Some(bit_len) = self.bit_len {
+            match (self.simd_len, self.vec_len) {
                 (None, None) => format!("{prefix}{bit_len}_t"),
                 (Some(simd), None) => format!("{prefix}{bit_len}x{simd}_t"),
                 (Some(simd), Some(vec)) => format!("{prefix}{bit_len}x{simd}x{vec}_t"),
@@ -18,6 +18,22 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
         }
     }
 
+    fn rust_type(&self) -> String {
+        let rust_prefix = self.kind.rust_prefix();
+        let c_prefix = self.kind.c_prefix();
+
+        if let Some(bit_len) = self.bit_len {
+            match (self.simd_len, self.vec_len) {
+                (None, None) => format!("{rust_prefix}{bit_len}"),
+                (Some(simd), None) => format!("{c_prefix}{bit_len}x{simd}_t"),
+                (Some(simd), Some(vec)) => format!("{c_prefix}{bit_len}x{simd}x{vec}_t"),
+                (None, Some(_)) => todo!("{self:#?}"), // Likely an invalid case
+            }
+        } else {
+            todo!("{self:#?}")
+        }
+    }
+
     /// Determines the load function for this type.
     fn get_load_function(&self) -> String {
         if let IntrinsicType {
diff --git a/library/stdarch/crates/intrinsic-test/src/common/argument.rs b/library/stdarch/crates/intrinsic-test/src/common/argument.rs
index 58fd08460f14c..8b28e46a6fad7 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/argument.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/argument.rs
@@ -94,6 +94,15 @@ where
             .to_string()
     }
 
+    pub fn as_non_imm_arglist_rust(&self) -> String {
+        self.iter()
+            .filter(|arg| !arg.has_constraint())
+            .format_with(", ", |arg, fmt| {
+                fmt(&format_args!("{}: {}", arg.name, arg.ty.rust_type()))
+            })
+            .to_string()
+    }
+
     pub fn as_call_params_c(&self, imm_args: &[i64]) -> String {
         let mut imm_args = imm_args.iter();
         self.iter()
@@ -112,8 +121,7 @@ where
     pub fn as_call_param_rust(&self) -> String {
         self.iter()
             .filter(|a| !a.has_constraint())
-            .map(|arg| arg.generate_name() + " as _")
-            .collect::<Vec<String>>()
+            .map(|arg| arg.generate_name())
             .join(", ")
     }
 
diff --git a/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs b/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs
index 0b0df5830c482..1d3f34060fafd 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs
@@ -1,56 +1,9 @@
 use itertools::Itertools;
 
-use crate::common::constraint::Constraint;
 use crate::common::intrinsic::Intrinsic;
 
 use super::intrinsic_helpers::IntrinsicTypeDefinition;
 
-fn generate_c_wrapper<'a, T: IntrinsicTypeDefinition + 'a>(
-    w: &mut impl std::io::Write,
-    intrinsic: &Intrinsic<T>,
-    constraints: &mut (impl Iterator<Item = &'a Constraint> + Clone),
-    imm_values: &mut Vec<i64>,
-) -> std::io::Result<()> {
-    if let Some(current) = constraints.next() {
-        for i in current.iter() {
-            imm_values.push(i);
-            generate_c_wrapper(w, intrinsic, &mut constraints.clone(), imm_values)?;
-            imm_values.pop();
-        }
-    } else {
-        writeln!(
-            w,
-            "
-{return_ty} {name}_wrapper{imm_arglist}({arglist}) {{
-    return {name}({params});
-}}",
-            return_ty = intrinsic.results.c_type(),
-            name = intrinsic.name,
-            imm_arglist = imm_values
-                .iter()
-                .format_with("", |i, fmt| fmt(&format_args!("_{i}"))),
-            arglist = intrinsic.arguments.as_non_imm_arglist_c(),
-            params = intrinsic.arguments.as_call_params_c(&imm_values)
-        )?;
-    }
-    Ok(())
-}
-
-fn create_c_wrapper<T: IntrinsicTypeDefinition>(
-    w: &mut impl std::io::Write,
-    intrinsic: &Intrinsic<T>,
-) -> std::io::Result<()> {
-    generate_c_wrapper(
-        w,
-        intrinsic,
-        &mut intrinsic
-            .arguments
-            .iter()
-            .filter_map(|arg| arg.constraint.as_ref()),
-        &mut Vec::new(),
-    )
-}
-
 pub fn write_wrapper_c<T: IntrinsicTypeDefinition>(
     w: &mut impl std::io::Write,
     notice: &str,
@@ -59,12 +12,30 @@ pub fn write_wrapper_c<T: IntrinsicTypeDefinition>(
 ) -> std::io::Result<()> {
     write!(w, "{notice}")?;
 
+    writeln!(w, "#include <stdint.h>")?;
+    writeln!(w, "#include <stddef.h>")?;
+
     for header in platform_headers {
         writeln!(w, "#include <{header}>")?;
     }
 
     for intrinsic in intrinsics {
-        create_c_wrapper(w, intrinsic)?;
+        intrinsic.iter_specializations(|imm_values| {
+            writeln!(
+                w,
+                "
+{return_ty} {name}_wrapper{imm_arglist}({arglist}) {{
+    return {name}({params});
+}}",
+                return_ty = intrinsic.results.c_type(),
+                name = intrinsic.name,
+                imm_arglist = imm_values
+                    .iter()
+                    .format_with("", |i, fmt| fmt(&format_args!("_{i}"))),
+                arglist = intrinsic.arguments.as_non_imm_arglist_c(),
+                params = intrinsic.arguments.as_call_params_c(&imm_values)
+            )
+        })?;
     }
 
     Ok(())
diff --git a/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs b/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs
index daf239cbd8d34..2d7ba0966d5e6 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs
@@ -4,18 +4,51 @@ use super::indentation::Indentation;
 use super::intrinsic_helpers::IntrinsicTypeDefinition;
 use crate::common::argument::ArgumentList;
 use crate::common::intrinsic::Intrinsic;
+use crate::common::intrinsic_helpers::TypeKind;
 
 // The number of times each intrinsic will be called.
 pub(crate) const PASSES: u32 = 20;
 
+// we need a reflexive equality relation, so treat NaNs as equal
+const COMMON_RUST_DEFINITIONS: &str = r#"
+macro_rules! wrap_partialeq {
+    ($($wrapper:ident ($inner:ty)),*) => {$(
+        #[derive(Debug, Copy, Clone)]
+        #[repr(transparent)]
+        pub struct $wrapper($inner);
+
+        impl PartialEq for $wrapper {
+            fn eq(&self, other: &Self) -> bool {
+                self.0 == other.0 || (self.0.is_nan() && other.0.is_nan())
+            }
+        }
+
+        impl Eq for $wrapper {}
+    )*}
+}
+
+wrap_partialeq!(NanEqF16(f16), NanEqF32(f32), NanEqF64(f64));
+"#;
+
 macro_rules! concatln {
     ($($lines:expr),* $(,)?) => {
         concat!($( $lines, "\n" ),*)
     };
 }
 
-fn write_cargo_toml_header(w: &mut impl std::io::Write, name: &str) -> std::io::Result<()> {
-    writeln!(
+pub fn write_bin_cargo_toml(
+    w: &mut impl std::io::Write,
+    module_count: usize,
+) -> std::io::Result<()> {
+    write!(w, concatln!("[workspace]", "members = ["))?;
+    for i in 0..module_count {
+        writeln!(w, "    \"mod_{i}\",")?;
+    }
+    writeln!(w, "]")
+}
+
+pub fn write_lib_cargo_toml(w: &mut impl std::io::Write, name: &str) -> std::io::Result<()> {
+    write!(
         w,
         concatln!(
             "[package]",
@@ -24,6 +57,12 @@ fn write_cargo_toml_header(w: &mut impl std::io::Write, name: &str) -> std::io::
             "authors = [{authors}]",
             "license = \"{license}\"",
             "edition = \"2018\"",
+            "",
+            "[dependencies]",
+            "core_arch = {{ path = \"../../crates/core_arch\" }}",
+            "",
+            "[build-dependencies]",
+            "cc = \"1\""
         ),
         name = name,
         version = env!("CARGO_PKG_VERSION"),
@@ -34,36 +73,12 @@ fn write_cargo_toml_header(w: &mut impl std::io::Write, name: &str) -> std::io::
     )
 }
 
-pub fn write_bin_cargo_toml(
-    w: &mut impl std::io::Write,
-    module_count: usize,
-) -> std::io::Result<()> {
-    write_cargo_toml_header(w, "intrinsic-test-programs")?;
-
-    writeln!(w, "[dependencies]")?;
-    writeln!(w, "core_arch = {{ path = \"../crates/core_arch\" }}")?;
-
-    for i in 0..module_count {
-        writeln!(w, "mod_{i} = {{ path = \"mod_{i}/\" }}")?;
-    }
-
-    Ok(())
-}
-
-pub fn write_lib_cargo_toml(w: &mut impl std::io::Write, name: &str) -> std::io::Result<()> {
-    write_cargo_toml_header(w, name)?;
-
-    writeln!(w, "[dependencies]")?;
-    writeln!(w, "core_arch = {{ path = \"../../crates/core_arch\" }}")?;
-
-    Ok(())
-}
-
 pub fn write_lib_rs<T: IntrinsicTypeDefinition>(
     w: &mut impl std::io::Write,
     notice: &str,
     cfg: &str,
     definitions: &str,
+    i: usize,
     intrinsics: &[Intrinsic<T>],
 ) -> std::io::Result<()> {
     write!(w, "{notice}")?;
@@ -79,6 +94,8 @@ pub fn write_lib_rs<T: IntrinsicTypeDefinition>(
 
     writeln!(w, "{cfg}")?;
 
+    writeln!(w, "{}", COMMON_RUST_DEFINITIONS)?;
+
     writeln!(w, "{definitions}")?;
 
     let mut seen = std::collections::HashSet::new();
@@ -95,124 +112,185 @@ pub fn write_lib_rs<T: IntrinsicTypeDefinition>(
         }
     }
 
+    write_bindings_rust(w, i, intrinsics)?;
+
     for intrinsic in intrinsics {
-        crate::common::gen_rust::create_rust_test_module(w, intrinsic)?;
+        create_rust_test(w, intrinsic)?;
     }
 
     Ok(())
 }
 
-pub fn generate_rust_test_loop<T: IntrinsicTypeDefinition>(
+fn generate_rust_test_loop<T: IntrinsicTypeDefinition>(
     w: &mut impl std::io::Write,
     intrinsic: &Intrinsic<T>,
-    indentation: Indentation,
-    specializations: &[Vec<i32>],
     passes: u32,
 ) -> std::io::Result<()> {
     let intrinsic_name = &intrinsic.name;
 
     // Each function (and each specialization) has its own type. Erase that type with a cast.
-    let mut coerce = String::from("unsafe fn(");
+    let mut coerce = String::from("fn(");
     for _ in intrinsic.arguments.iter().filter(|a| !a.has_constraint()) {
         coerce += "_, ";
     }
     coerce += ") -> _";
 
-    match specializations {
-        [] => {
-            writeln!(w, "    let specializations = [(\"\", {intrinsic_name})];")?;
-        }
-        [const_args] if const_args.is_empty() => {
-            writeln!(w, "    let specializations = [(\"\", {intrinsic_name})];")?;
-        }
-        _ => {
-            writeln!(w, "    let specializations = [")?;
-
-            for specialization in specializations {
-                let mut specialization: Vec<_> =
-                    specialization.iter().map(|d| d.to_string()).collect();
-
-                let const_args = specialization.join(",");
-
-                // The identifier is reversed.
-                specialization.reverse();
-                let id = specialization.join("-");
-
-                writeln!(
-                    w,
-                    "        (\"-{id}\", {intrinsic_name}::<{const_args}> as {coerce}),"
-                )?;
-            }
-
-            writeln!(w, "    ];")?;
-        }
+    if intrinsic
+        .arguments
+        .iter()
+        .filter(|arg| arg.has_constraint())
+        .count()
+        == 0
+    {
+        writeln!(
+            w,
+            "    let specializations = [(\"\", {intrinsic_name}, {intrinsic_name}_wrapper)];"
+        )?;
+    } else {
+        writeln!(w, "    let specializations = [")?;
+
+        intrinsic.iter_specializations(|imm_values| {
+            writeln!(
+                w,
+                "        (\"{const_args}\", {intrinsic_name}::<{const_args}> as unsafe {coerce}, {intrinsic_name}_wrapper_{c_const_args} as unsafe extern \"C\" {coerce}),",
+                const_args = imm_values.iter().join(","),
+                c_const_args = imm_values.iter().join("_"),
+            )
+        })?;
+
+        writeln!(w, "    ];")?;
     }
 
+    let (cast_prefix, cast_suffix) = if intrinsic.results.is_simd() {
+        (
+            format!(
+                "std::mem::transmute::<_, [{}; {}]>(",
+                intrinsic.results.rust_scalar_type().replace("f", "NanEqF"),
+                intrinsic.results.num_lanes() * intrinsic.results.num_vectors()
+            ),
+            ")",
+        )
+    } else if intrinsic.results.kind == TypeKind::Float {
+        (
+            match intrinsic.results.inner_size() {
+                16 => format!("NanEqF16("),
+                32 => format!("NanEqF32("),
+                64 => format!("NanEqF64("),
+                _ => unimplemented!(),
+            },
+            ")",
+        )
+    } else {
+        ("".to_string(), "")
+    };
+
     write!(
         w,
         concatln!(
-            "    for (id, f) in specializations {{",
+            "    for (id, rust, c) in specializations {{",
             "        for i in 0..{passes} {{",
             "            unsafe {{",
             "{loaded_args}",
-            "                let __return_value = f({args});",
-            "                println!(\"Result {{id}}-{{}}: {{:?}}\", i + 1, {return_value});",
+            "                let __rust_return_value = rust({args});",
+            "                let __c_return_value = c({args});",
+            "                assert_eq!({cast_prefix}__rust_return_value{cast_suffix}, {cast_prefix}__c_return_value{cast_suffix}, \"{{id}}\");",
             "            }}",
             "        }}",
             "    }}",
         ),
-        loaded_args = intrinsic.arguments.load_values_rust(indentation.nest_by(4)),
+        loaded_args = intrinsic
+            .arguments
+            .load_values_rust(Indentation::default().nest_by(4)),
         args = intrinsic.arguments.as_call_param_rust(),
-        return_value = intrinsic.results.print_result_rust(),
         passes = passes,
+        cast_prefix = cast_prefix,
+        cast_suffix = cast_suffix,
     )
 }
 
-/// Generate the specializations (unique sequences of const-generic arguments) for this intrinsic.
-fn generate_rust_specializations(
-    constraints: &mut impl Iterator<Item = impl Iterator<Item = i64>>,
-) -> Vec<Vec<i32>> {
-    let mut specializations = vec![vec![]];
-
-    for constraint in constraints {
-        specializations = constraint
-            .flat_map(|right| {
-                specializations.iter().map(move |left| {
-                    let mut left = left.clone();
-                    left.push(i32::try_from(right).unwrap());
-                    left
-                })
-            })
-            .collect();
-    }
-
-    specializations
-}
-
-// Top-level function to create complete test program
-pub fn create_rust_test_module<T: IntrinsicTypeDefinition>(
+fn create_rust_test<T: IntrinsicTypeDefinition>(
     w: &mut impl std::io::Write,
     intrinsic: &Intrinsic<T>,
 ) -> std::io::Result<()> {
     trace!("generating `{}`", intrinsic.name);
-    let indentation = Indentation::default();
 
-    writeln!(w, "pub fn run_{}() {{", intrinsic.name)?;
+    write!(
+        w,
+        concatln!("#[test]", "fn test_{intrinsic_name}() {{"),
+        intrinsic_name = intrinsic.name,
+    )?;
 
     // Define the arrays of arguments.
     let arguments = &intrinsic.arguments;
-    arguments.gen_arglists_rust(w, indentation.nested(), PASSES)?;
-
-    // Define any const generics as `const` items, then generate the actual test loop.
-    let specializations = generate_rust_specializations(
-        &mut arguments
-            .iter()
-            .filter_map(|i| i.constraint.as_ref().map(|v| v.iter())),
-    );
+    arguments.gen_arglists_rust(w, Indentation::default().nested(), PASSES)?;
 
-    generate_rust_test_loop(w, intrinsic, indentation, &specializations, PASSES)?;
+    generate_rust_test_loop(w, intrinsic, PASSES)?;
 
     writeln!(w, "}}")?;
 
     Ok(())
 }
+
+pub fn write_bindings_rust<T: IntrinsicTypeDefinition>(
+    w: &mut impl std::io::Write,
+    i: usize,
+    intrinsics: &[Intrinsic<T>],
+) -> std::io::Result<()> {
+    write!(
+        w,
+        concatln!(
+            "#[allow(improper_ctypes)]",
+            "#[link(name = \"wrapper_{i}\")]",
+            "unsafe extern \"C\" {{"
+        ),
+        i = i
+    )?;
+
+    for intrinsic in intrinsics {
+        intrinsic.iter_specializations(|imm_values| {
+            writeln!(
+                w,
+                "    fn {name}_wrapper{imm_arglist}({arglist}) -> {return_ty};",
+                return_ty = intrinsic.results.rust_type(),
+                name = intrinsic.name,
+                imm_arglist = imm_values
+                    .iter()
+                    .format_with("", |i, fmt| fmt(&format_args!("_{i}"))),
+                arglist = intrinsic.arguments.as_non_imm_arglist_rust(),
+            )
+        })?;
+    }
+
+    writeln!(w, "}}")
+}
+
+pub fn write_build_rs(
+    w: &mut impl std::io::Write,
+    i: usize,
+    arch_flags: &[&str],
+) -> std::io::Result<()> {
+    const COMMON_FLAGS: &[&str] = &["-ffp-contract=off", "-ffp-model=strict", "-Wno-narrowing"];
+
+    write!(
+        w,
+        concatln!(
+            "fn main() {{",
+            "    cc::Build::new()",
+            "    .file(\"../../c_programs/wrapper_{i}.c\")",
+            "    .opt_level(2)",
+            "    .flags(&[",
+        ),
+        i = i
+    )?;
+
+    let indentation = Indentation::default().nest_by(2);
+    for flag in COMMON_FLAGS.iter().chain(arch_flags) {
+        writeln!(w, "{indentation}\"{flag}\",")?;
+    }
+
+    write!(
+        w,
+        concatln!("    ])", "    .compile(\"wrapper_{i}\");", "}}"),
+        i = i
+    )
+}
diff --git a/library/stdarch/crates/intrinsic-test/src/common/intrinsic.rs b/library/stdarch/crates/intrinsic-test/src/common/intrinsic.rs
index 81f6d6d8b5b2c..76e5959153d07 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/intrinsic.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/intrinsic.rs
@@ -1,3 +1,5 @@
+use crate::common::constraint::Constraint;
+
 use super::argument::ArgumentList;
 use super::intrinsic_helpers::IntrinsicTypeDefinition;
 
@@ -16,3 +18,36 @@ pub struct Intrinsic<T: IntrinsicTypeDefinition> {
     /// Any architecture-specific tags.
     pub arch_tags: Vec<String>,
 }
+
+fn recurse_specializations<'a, E>(
+    constraints: &mut (impl Iterator<Item = &'a Constraint> + Clone),
+    imm_values: &mut Vec<i64>,
+    f: &mut impl FnMut(&[i64]) -> Result<(), E>,
+) -> Result<(), E> {
+    if let Some(current) = constraints.next() {
+        for i in current.iter() {
+            imm_values.push(i);
+            recurse_specializations(&mut constraints.clone(), imm_values, f)?;
+            imm_values.pop();
+        }
+        Ok(())
+    } else {
+        f(&imm_values)
+    }
+}
+
+impl<T: IntrinsicTypeDefinition> Intrinsic<T> {
+    pub fn iter_specializations<E>(
+        &self,
+        mut f: impl FnMut(&[i64]) -> Result<(), E>,
+    ) -> Result<(), E> {
+        recurse_specializations(
+            &mut self
+                .arguments
+                .iter()
+                .filter_map(|arg| arg.constraint.as_ref()),
+            &mut Vec::new(),
+            &mut f,
+        )
+    }
+}
diff --git a/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs
index f216b55c2f752..63e30a8b23e2d 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs
@@ -93,6 +93,7 @@ impl TypeKind {
             Self::Poly => "u",
             Self::Char(Sign::Unsigned) => "u",
             Self::Char(Sign::Signed) => "i",
+            Self::Mask => "u",
             _ => unreachable!("Unused type kind: {self:#?}"),
         }
     }
@@ -256,19 +257,20 @@ pub trait IntrinsicTypeDefinition: Deref<Target = IntrinsicType> {
     /// can be directly defined in `impl` blocks
     fn c_type(&self) -> String;
 
-    /// Generates a std::cout for the intrinsics results that will match the
-    /// rust debug output format for the return type. The generated line assumes
-    /// there is an int i in scope which is the current pass number.
-    fn print_result_rust(&self) -> String {
-        String::from("format_args!(\"{__return_value:.150?}\")")
-    }
+    /// Gets a string containing the typename for this type in Rust format.
+    /// can be directly defined in `impl` blocks
+    fn rust_type(&self) -> String;
 
     /// To enable architecture-specific logic
     fn rust_scalar_type(&self) -> String {
-        format!(
-            "{prefix}{bits}",
-            prefix = self.kind().rust_prefix(),
-            bits = self.inner_size()
-        )
+        if self.is_simd() {
+            format!(
+                "{prefix}{bits}",
+                prefix = self.kind().rust_prefix(),
+                bits = self.inner_size()
+            )
+        } else {
+            self.rust_type()
+        }
     }
 }
diff --git a/library/stdarch/crates/intrinsic-test/src/common/mod.rs b/library/stdarch/crates/intrinsic-test/src/common/mod.rs
index d98592d3b41d0..86849f7db34e0 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/mod.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/mod.rs
@@ -6,7 +6,7 @@ use cli::ProcessedCli;
 
 use crate::common::{
     gen_c::write_wrapper_c,
-    gen_rust::{write_bin_cargo_toml, write_lib_cargo_toml, write_lib_rs},
+    gen_rust::{write_bin_cargo_toml, write_build_rs, write_lib_cargo_toml, write_lib_rs},
     intrinsic::Intrinsic,
     intrinsic_helpers::IntrinsicTypeDefinition,
 };
@@ -14,19 +14,19 @@ use crate::common::{
 pub mod argument;
 pub mod cli;
 pub mod constraint;
-pub mod gen_c;
-pub mod gen_rust;
-pub mod indentation;
 pub mod intrinsic;
 pub mod intrinsic_helpers;
-pub mod values;
+
+mod gen_c;
+mod gen_rust;
+mod indentation;
+mod values;
 
 /// Architectures must support this trait
 /// to be successfully tested.
 pub trait SupportedArchitectureTest {
     type IntrinsicImpl: IntrinsicTypeDefinition + Sync;
 
-    fn cli_options(&self) -> &ProcessedCli;
     fn intrinsics(&self) -> &[Intrinsic<Self::IntrinsicImpl>];
 
     fn create(cli_options: ProcessedCli) -> Self;
@@ -38,15 +38,17 @@ pub trait SupportedArchitectureTest {
     const PLATFORM_RUST_CFGS: &str;
     const PLATFORM_RUST_DEFINITIONS: &str;
 
+    fn arch_flags(&self) -> Vec<&str>;
+
     fn generate_c_file(&self) {
-        let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len(), 400);
+        let (chunk_size, _chunk_count) = manual_chunk(self.intrinsics().len());
 
         std::fs::create_dir_all("c_programs").unwrap();
         self.intrinsics()
             .par_chunks(chunk_size)
             .enumerate()
             .map(|(i, chunk)| {
-                let c_filename = format!("c_programs/wrapper_{i}.cpp");
+                let c_filename = format!("c_programs/wrapper_{i}.c");
                 let mut file = File::create(&c_filename).unwrap();
                 write_wrapper_c(&mut file, Self::NOTICE, Self::PLATFORM_C_HEADERS, chunk)
             })
@@ -55,15 +57,17 @@ pub trait SupportedArchitectureTest {
     }
 
     fn generate_rust_file(&self) {
-        std::fs::create_dir_all("rust_programs/src").unwrap();
+        let arch_flags = self.arch_flags();
 
-        let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len(), 400);
+        std::fs::create_dir_all("rust_programs").unwrap();
+
+        let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len());
 
         let mut cargo = File::create("rust_programs/Cargo.toml").unwrap();
         write_bin_cargo_toml(&mut cargo, chunk_count).unwrap();
 
         self.intrinsics()
-            .par_chunks(chunk_size)
+            .chunks(chunk_size)
             .enumerate()
             .map(|(i, chunk)| {
                 std::fs::create_dir_all(format!("rust_programs/mod_{i}/src"))?;
@@ -77,6 +81,7 @@ pub trait SupportedArchitectureTest {
                     Self::NOTICE,
                     Self::PLATFORM_RUST_CFGS,
                     Self::PLATFORM_RUST_DEFINITIONS,
+                    i,
                     chunk,
                 )?;
 
@@ -86,6 +91,12 @@ pub trait SupportedArchitectureTest {
 
                 write_lib_cargo_toml(&mut file, &format!("mod_{i}"))?;
 
+                let build_rs_filename = format!("rust_programs/mod_{i}/build.rs");
+                trace!("generating `{build_rs_filename}`");
+                let mut file = File::create(build_rs_filename).unwrap();
+
+                write_build_rs(&mut file, i, &arch_flags).unwrap();
+
                 Ok(())
             })
             .collect::<Result<(), std::io::Error>>()
@@ -93,13 +104,7 @@ pub trait SupportedArchitectureTest {
     }
 }
 
-// pub fn chunk_info(intrinsic_count: usize) -> (usize, usize) {
-//     let available_parallelism = std::thread::available_parallelism().unwrap().get();
-//     let chunk_size = intrinsic_count.div_ceil(Ord::min(available_parallelism, intrinsic_count));
-
-//     (chunk_size, intrinsic_count.div_ceil(chunk_size))
-// }
-
-pub fn manual_chunk(intrinsic_count: usize, chunk_size: usize) -> (usize, usize) {
-    (chunk_size, intrinsic_count.div_ceil(chunk_size))
+pub fn manual_chunk(intrinsic_count: usize) -> (usize, usize) {
+    let ncores = std::thread::available_parallelism().unwrap().into();
+    (intrinsic_count.div_ceil(ncores), ncores)
 }
diff --git a/library/stdarch/crates/intrinsic-test/src/x86/config.rs b/library/stdarch/crates/intrinsic-test/src/x86/config.rs
index e1a006fd9fcde..68737ab5ac4c8 100644
--- a/library/stdarch/crates/intrinsic-test/src/x86/config.rs
+++ b/library/stdarch/crates/intrinsic-test/src/x86/config.rs
@@ -3,7 +3,6 @@ pub const NOTICE: &str = "\
 // test are derived from an XML specification, published under the same license as the
 // `intrinsic-test` crate.\n";
 
-// Format f16 values (and vectors containing them) in a way that is consistent with C.
 pub const PLATFORM_RUST_DEFINITIONS: &str = r#"
 use core_arch::arch::x86_64::*;
 
@@ -132,14 +131,8 @@ unsafe fn _mm512_loadu_epi64_to___m512(mem_addr: *const i64) -> __m512 {
 "#;
 
 pub const PLATFORM_RUST_CFGS: &str = r#"
-#![cfg_attr(target_arch = "x86", feature(avx))]
-#![cfg_attr(target_arch = "x86", feature(sse))]
-#![cfg_attr(target_arch = "x86", feature(sse2))]
-#![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512_bf16))]
-#![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512_f16))]
-#![cfg_attr(target_arch = "x86", feature(stdarch_x86_rtm))]
-#![cfg_attr(target_arch = "x86", feature(stdarch_x86_rtm))]
-#![cfg_attr(target_arch = "x86_64", feature(x86_amx_intrinsics))]
-#![cfg_attr(target_arch = "x86_64", feature(stdarch_x86_avx512_f16))]
-#![feature(fmt_helpers_for_derive)]
+#![feature(stdarch_x86_avx512_bf16)]
+#![feature(stdarch_x86_avx512_f16)]
+#![feature(stdarch_x86_rtm)]
+#![feature(x86_amx_intrinsics)]
 "#;
diff --git a/library/stdarch/crates/intrinsic-test/src/x86/mod.rs b/library/stdarch/crates/intrinsic-test/src/x86/mod.rs
index 0e2998f1a3960..5d4798482a1d3 100644
--- a/library/stdarch/crates/intrinsic-test/src/x86/mod.rs
+++ b/library/stdarch/crates/intrinsic-test/src/x86/mod.rs
@@ -13,27 +13,59 @@ use xml_parser::get_xml_intrinsics;
 
 pub struct X86ArchitectureTest {
     intrinsics: Vec<Intrinsic<X86IntrinsicType>>,
-    cli_options: ProcessedCli,
 }
 
 impl SupportedArchitectureTest for X86ArchitectureTest {
     type IntrinsicImpl = X86IntrinsicType;
 
-    fn cli_options(&self) -> &ProcessedCli {
-        &self.cli_options
-    }
-
     fn intrinsics(&self) -> &[Intrinsic<X86IntrinsicType>] {
         &self.intrinsics
     }
 
     const NOTICE: &str = config::NOTICE;
 
-    const PLATFORM_C_HEADERS: &[&str] = &["immintrin.h", "cstddef", "cstdint"];
+    const PLATFORM_C_HEADERS: &[&str] = &["immintrin.h"];
 
     const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS;
     const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS;
 
+    fn arch_flags(&self) -> Vec<&str> {
+        vec![
+            "-mavx",
+            "-mavx2",
+            "-mavx512f",
+            "-msse2",
+            "-mavx512vl",
+            "-mavx512bw",
+            "-mavx512dq",
+            "-mavx512cd",
+            "-mavx512fp16",
+            "-msha",
+            "-msha512",
+            "-msm3",
+            "-msm4",
+            "-mavxvnni",
+            "-mavxvnniint8",
+            "-mavxneconvert",
+            "-mavxifma",
+            "-mavxvnniint16",
+            "-mavx512bf16",
+            "-mavx512bitalg",
+            "-mavx512ifma",
+            "-mavx512vbmi",
+            "-mavx512vbmi2",
+            "-mavx512vnni",
+            "-mavx512vpopcntdq",
+            "-mavx512vp2intersect",
+            "-mbmi",
+            "-mbmi2",
+            "-mgfni",
+            "-mvaes",
+            "-mvpclmulqdq",
+            "-mlzcnt",
+        ]
+    }
+
     fn create(cli_options: ProcessedCli) -> Self {
         let mut intrinsics =
             get_xml_intrinsics(&cli_options.filename).expect("Error parsing input file");
@@ -59,9 +91,6 @@ impl SupportedArchitectureTest for X86ArchitectureTest {
             .take(sample_size)
             .collect::<Vec<_>>();
 
-        Self {
-            intrinsics: intrinsics,
-            cli_options: cli_options,
-        }
+        Self { intrinsics }
     }
 }
diff --git a/library/stdarch/crates/intrinsic-test/src/x86/types.rs b/library/stdarch/crates/intrinsic-test/src/x86/types.rs
index 83581c0dac6ab..c6ea15e150752 100644
--- a/library/stdarch/crates/intrinsic-test/src/x86/types.rs
+++ b/library/stdarch/crates/intrinsic-test/src/x86/types.rs
@@ -23,6 +23,30 @@ impl IntrinsicTypeDefinition for X86IntrinsicType {
             .replace("const ", "")
     }
 
+    fn rust_type(&self) -> String {
+        let type_data = &*self.param.type_data;
+        if type_data.starts_with("__m") {
+            return type_data.to_owned();
+        }
+        match &*type_data.replace("const ", "") {
+            "_Float16" => "f16",
+            "__bfloat16" => "bf16",
+            "float" => "f32",
+            "double" => "f64",
+            "__int8" | "char" => "i8",
+            "unsigned char" => "u8",
+            "__int16" | "short" => "i16",
+            "unsigned short" => "u16",
+            "__int32" | "int" => "i32",
+            "unsigned __int32" | "unsigned int" | "unsigned long" => "u32",
+            "__int64" | "long long" => "i64",
+            "unsigned __int64" => "u64",
+            "size_t" => "usize",
+            _ => todo!("unknown type {type_data}"),
+        }
+        .to_string()
+    }
+
     /// Determines the load function for this type.
     fn get_load_function(&self) -> String {
         let type_value = self.param.type_data.clone();
@@ -92,35 +116,15 @@ impl IntrinsicTypeDefinition for X86IntrinsicType {
     }
 
     fn rust_scalar_type(&self) -> String {
-        let prefix = match self.data.kind {
-            TypeKind::Mask => String::from("__mmask"),
-            TypeKind::Vector => String::from("i"),
-            _ => self.kind().rust_prefix().to_string(),
-        };
-
-        let bits = if self.inner_size() >= 128 {
-            32
+        if self.is_simd() {
+            format!(
+                "{prefix}{bits}",
+                prefix = self.kind().rust_prefix(),
+                bits = self.inner_size()
+            )
         } else {
-            self.inner_size()
-        };
-        format!("{prefix}{bits}")
-    }
-
-    fn print_result_rust(&self) -> String {
-        let return_value = match self.kind() {
-            // `_mm{256}_cvtps_ph` has return type __m128i but contains f16 values
-            TypeKind::Float if self.param.type_data == "__m128i" => {
-                "format_args!(\"{:.150?}\", debug_as::<_, f16>(__return_value))".to_string()
-            }
-            TypeKind::Int(_)
-                if ["__m128i", "__m256i", "__m512i"].contains(&self.param.type_data.as_str()) =>
-            {
-                format!("debug_as::<_, u{}>(__return_value)", self.inner_size())
-            }
-            _ => "format_args!(\"{__return_value:.150?}\")".to_string(),
-        };
-
-        return_value
+            self.rust_type().replace("__mmask", "u")
+        }
     }
 }
 

From 0962db67bcc855d02d45de549364e2f3b0ab322c Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Sat, 25 Apr 2026 02:10:19 +0530
Subject: [PATCH 57/78] Use pointers for the C definitions to resolve ABI
 inconsistencies

---
 .../intrinsic-test/src/common/argument.rs     | 43 +++++++++++++++++--
 .../crates/intrinsic-test/src/common/gen_c.rs |  4 +-
 .../intrinsic-test/src/common/gen_rust.rs     | 18 +++++---
 3 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/library/stdarch/crates/intrinsic-test/src/common/argument.rs b/library/stdarch/crates/intrinsic-test/src/common/argument.rs
index 8b28e46a6fad7..73ae70a08c059 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/argument.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/argument.rs
@@ -1,5 +1,7 @@
 use itertools::Itertools;
 
+use crate::common::intrinsic_helpers::TypeKind;
+
 use super::constraint::Constraint;
 use super::gen_rust::PASSES;
 use super::indentation::Indentation;
@@ -74,6 +76,11 @@ where
             format!("{}_vals", self.name.to_lowercase())
         }
     }
+
+    pub(crate) fn pass_by_ref(&self) -> bool {
+        // pass SIMD types and `f16` by reference
+        self.is_simd() || (self.ty.kind() == TypeKind::Float && self.ty.inner_size() == 16)
+    }
 }
 
 #[derive(Debug, PartialEq, Clone)]
@@ -88,8 +95,12 @@ where
     pub fn as_non_imm_arglist_c(&self) -> String {
         self.iter()
             .filter(|arg| !arg.has_constraint())
-            .format_with(", ", |arg, fmt| {
-                fmt(&format_args!("{} {}", arg.to_c_type(), arg.name))
+            .format_with("", |arg, fmt| {
+                if arg.pass_by_ref() {
+                    fmt(&format_args!(", const {}* {}", arg.to_c_type(), arg.name))
+                } else {
+                    fmt(&format_args!(", {} {}", arg.to_c_type(), arg.name))
+                }
             })
             .to_string()
     }
@@ -97,8 +108,16 @@ where
     pub fn as_non_imm_arglist_rust(&self) -> String {
         self.iter()
             .filter(|arg| !arg.has_constraint())
-            .format_with(", ", |arg, fmt| {
-                fmt(&format_args!("{}: {}", arg.name, arg.ty.rust_type()))
+            .format_with("", |arg, fmt| {
+                if arg.pass_by_ref() {
+                    fmt(&format_args!(
+                        ", {}: *const {}",
+                        arg.name,
+                        arg.ty.rust_type()
+                    ))
+                } else {
+                    fmt(&format_args!(", {}: {}", arg.name, arg.ty.rust_type()))
+                }
             })
             .to_string()
     }
@@ -110,6 +129,9 @@ where
                 if arg.has_constraint() {
                     fmt(&imm_args.next().unwrap())
                 } else {
+                    if arg.pass_by_ref() {
+                        fmt(&"*")?;
+                    }
                     fmt(&arg.name)
                 }
             })
@@ -125,6 +147,19 @@ where
             .join(", ")
     }
 
+    pub fn as_c_call_param_rust(&self) -> String {
+        self.iter()
+            .filter(|a| !a.has_constraint())
+            .map(|arg| {
+                if arg.pass_by_ref() {
+                    format!(", &raw const {}", arg.generate_name())
+                } else {
+                    format!(", {}", arg.generate_name())
+                }
+            })
+            .join("")
+    }
+
     /// Creates a line for each argument that initializes an array for Rust from which `loads` argument
     /// values can be loaded as a sliding window, e.g `const A_VALS: [u32; 20]  = [...];`
     pub fn gen_arglists_rust(
diff --git a/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs b/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs
index 1d3f34060fafd..bdf6f68d58cc2 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs
@@ -24,8 +24,8 @@ pub fn write_wrapper_c<T: IntrinsicTypeDefinition>(
             writeln!(
                 w,
                 "
-{return_ty} {name}_wrapper{imm_arglist}({arglist}) {{
-    return {name}({params});
+void {name}_wrapper{imm_arglist}({return_ty}* __dst{arglist}) {{
+    *__dst = {name}({params});
 }}",
                 return_ty = intrinsic.results.c_type(),
                 name = intrinsic.name,
diff --git a/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs b/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs
index 2d7ba0966d5e6..4ac9b7371ba13 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs
@@ -130,10 +130,13 @@ fn generate_rust_test_loop<T: IntrinsicTypeDefinition>(
 
     // Each function (and each specialization) has its own type. Erase that type with a cast.
     let mut coerce = String::from("fn(");
+    let mut c_coerce = String::from("fn(_, ");
     for _ in intrinsic.arguments.iter().filter(|a| !a.has_constraint()) {
         coerce += "_, ";
+        c_coerce += "_, ";
     }
     coerce += ") -> _";
+    c_coerce += ")";
 
     if intrinsic
         .arguments
@@ -152,7 +155,7 @@ fn generate_rust_test_loop<T: IntrinsicTypeDefinition>(
         intrinsic.iter_specializations(|imm_values| {
             writeln!(
                 w,
-                "        (\"{const_args}\", {intrinsic_name}::<{const_args}> as unsafe {coerce}, {intrinsic_name}_wrapper_{c_const_args} as unsafe extern \"C\" {coerce}),",
+                "        (\"{const_args}\", {intrinsic_name}::<{const_args}> as unsafe {coerce}, {intrinsic_name}_wrapper_{c_const_args} as unsafe extern \"C\" {c_coerce}),",
                 const_args = imm_values.iter().join(","),
                 c_const_args = imm_values.iter().join("_"),
             )
@@ -191,8 +194,12 @@ fn generate_rust_test_loop<T: IntrinsicTypeDefinition>(
             "        for i in 0..{passes} {{",
             "            unsafe {{",
             "{loaded_args}",
-            "                let __rust_return_value = rust({args});",
-            "                let __c_return_value = c({args});",
+            "                let __rust_return_value = rust({rust_args});",
+            "",
+            "                let mut __c_return_value = std::mem::MaybeUninit::uninit();",
+            "                c(__c_return_value.as_mut_ptr(){c_args});",
+            "                let __c_return_value = __c_return_value.assume_init();",
+            "",
             "                assert_eq!({cast_prefix}__rust_return_value{cast_suffix}, {cast_prefix}__c_return_value{cast_suffix}, \"{{id}}\");",
             "            }}",
             "        }}",
@@ -201,7 +208,8 @@ fn generate_rust_test_loop<T: IntrinsicTypeDefinition>(
         loaded_args = intrinsic
             .arguments
             .load_values_rust(Indentation::default().nest_by(4)),
-        args = intrinsic.arguments.as_call_param_rust(),
+        rust_args = intrinsic.arguments.as_call_param_rust(),
+        c_args = intrinsic.arguments.as_c_call_param_rust(),
         passes = passes,
         cast_prefix = cast_prefix,
         cast_suffix = cast_suffix,
@@ -250,7 +258,7 @@ pub fn write_bindings_rust<T: IntrinsicTypeDefinition>(
         intrinsic.iter_specializations(|imm_values| {
             writeln!(
                 w,
-                "    fn {name}_wrapper{imm_arglist}({arglist}) -> {return_ty};",
+                "    fn {name}_wrapper{imm_arglist}(__dst: *mut {return_ty}{arglist});",
                 return_ty = intrinsic.results.rust_type(),
                 name = intrinsic.name,
                 imm_arglist = imm_values

From a5179d0f8a47f8c009ad93031c01d21b5b0cc910 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Thu, 7 May 2026 08:05:52 +0530
Subject: [PATCH 58/78] Make floats static, as rounding is not a real concern
 here anymore

---
 .../intrinsic-test/src/common/argument.rs     | 49 +++----------------
 .../intrinsic-test/src/common/gen_rust.rs     |  6 +--
 .../src/common/intrinsic_helpers.rs           | 15 ------
 3 files changed, 8 insertions(+), 62 deletions(-)

diff --git a/library/stdarch/crates/intrinsic-test/src/common/argument.rs b/library/stdarch/crates/intrinsic-test/src/common/argument.rs
index 73ae70a08c059..885d5e998ef54 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/argument.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/argument.rs
@@ -53,28 +53,14 @@ where
         self.constraint.is_some()
     }
 
-    /// The binding keyword (e.g. "const" or "let") for the array of possible test inputs.
-    fn rust_vals_array_binding(&self) -> impl std::fmt::Display {
-        if self.ty.is_rust_vals_array_const() {
-            "const"
-        } else {
-            "let"
-        }
-    }
-
     /// The name (e.g. "A_VALS" or "a_vals") for the array of possible test inputs.
     pub(crate) fn rust_vals_array_name(&self) -> impl std::fmt::Display {
-        if self.ty.is_rust_vals_array_const() {
-            let loads = crate::common::gen_rust::PASSES;
-            format!(
-                "{}_{ty}_{load_size}",
-                self.name.to_uppercase(),
-                ty = self.ty.rust_scalar_type(),
-                load_size = self.ty.num_lanes() * self.ty.num_vectors() + loads - 1,
-            )
-        } else {
-            format!("{}_vals", self.name.to_lowercase())
-        }
+        let loads = crate::common::gen_rust::PASSES;
+        format!(
+            "{ty}_{load_size}",
+            ty = self.ty.rust_scalar_type().to_uppercase(),
+            load_size = self.ty.num_lanes() * self.ty.num_vectors() + loads - 1,
+        )
     }
 
     pub(crate) fn pass_by_ref(&self) -> bool {
@@ -160,26 +146,6 @@ where
             .join("")
     }
 
-    /// Creates a line for each argument that initializes an array for Rust from which `loads` argument
-    /// values can be loaded as a sliding window, e.g `const A_VALS: [u32; 20]  = [...];`
-    pub fn gen_arglists_rust(
-        &self,
-        w: &mut impl std::io::Write,
-        indentation: Indentation,
-        loads: u32,
-    ) -> std::io::Result<()> {
-        for arg in self.iter().filter(|&arg| !arg.has_constraint()) {
-            // Constants are defined globally.
-            if arg.ty.is_rust_vals_array_const() {
-                continue;
-            }
-
-            Self::gen_arg_rust(arg, w, indentation, loads)?;
-        }
-
-        Ok(())
-    }
-
     pub fn gen_arg_rust(
         arg: &Argument<T>,
         w: &mut impl std::io::Write,
@@ -188,8 +154,7 @@ where
     ) -> std::io::Result<()> {
         writeln!(
             w,
-            "{indentation}{bind} {name}: [{ty}; {load_size}] = {values};\n",
-            bind = arg.rust_vals_array_binding(),
+            "{indentation}static {name}: [{ty}; {load_size}] = {values};\n",
             name = arg.rust_vals_array_name(),
             ty = arg.ty.rust_scalar_type(),
             load_size = arg.ty.num_lanes() * arg.ty.num_vectors() + loads - 1,
diff --git a/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs b/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs
index 4ac9b7371ba13..d23710451d478 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs
@@ -102,7 +102,7 @@ pub fn write_lib_rs<T: IntrinsicTypeDefinition>(
 
     for intrinsic in intrinsics {
         for arg in &intrinsic.arguments.args {
-            if !arg.has_constraint() && arg.ty.is_rust_vals_array_const() {
+            if !arg.has_constraint() {
                 let name = arg.rust_vals_array_name().to_string();
 
                 if seen.insert(name) {
@@ -228,10 +228,6 @@ fn create_rust_test<T: IntrinsicTypeDefinition>(
         intrinsic_name = intrinsic.name,
     )?;
 
-    // Define the arrays of arguments.
-    let arguments = &intrinsic.arguments;
-    arguments.gen_arglists_rust(w, Indentation::default().nested(), PASSES)?;
-
     generate_rust_test_loop(w, intrinsic, PASSES)?;
 
     writeln!(w, "}}")?;
diff --git a/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs
index 63e30a8b23e2d..ab4a565200bc8 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs
@@ -231,21 +231,6 @@ impl IntrinsicType {
             _ => unimplemented!("populate random: {self:#?}"),
         }
     }
-
-    pub fn is_rust_vals_array_const(&self) -> bool {
-        match self {
-            // Floats have to be loaded at runtime for stable NaN conversion.
-            IntrinsicType {
-                kind: TypeKind::Float,
-                ..
-            } => false,
-            IntrinsicType {
-                kind: TypeKind::Int(_) | TypeKind::Poly,
-                ..
-            } => true,
-            _ => true,
-        }
-    }
 }
 
 pub trait IntrinsicTypeDefinition: Deref<Target = IntrinsicType> {

From 10f4190ce537ce40f7b41299a271dc77f6104362 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Mon, 16 Mar 2026 05:31:54 +0530
Subject: [PATCH 59/78] Modify the CI scripts to work with the new design

---
 library/stdarch/.github/workflows/main.yml    |   1 -
 .../aarch64-unknown-linux-gnu/Dockerfile      |   2 -
 .../aarch64_be-unknown-linux-gnu/Dockerfile   |   1 -
 .../arm-unknown-linux-gnueabihf/Dockerfile    |   4 +-
 .../armv7-unknown-linux-gnueabihf/Dockerfile  |   2 -
 .../x86_64-unknown-linux-gnu/Dockerfile       |   6 +-
 library/stdarch/ci/intrinsic-test-docker.sh   |   2 +-
 library/stdarch/ci/intrinsic-test.sh          | 115 ++++--------------
 8 files changed, 27 insertions(+), 106 deletions(-)

diff --git a/library/stdarch/.github/workflows/main.yml b/library/stdarch/.github/workflows/main.yml
index b5dbefef4eea3..5c84e856d54bb 100644
--- a/library/stdarch/.github/workflows/main.yml
+++ b/library/stdarch/.github/workflows/main.yml
@@ -279,7 +279,6 @@ jobs:
           - aarch64-unknown-linux-gnu
           - aarch64_be-unknown-linux-gnu
           - armv7-unknown-linux-gnueabihf
-          - arm-unknown-linux-gnueabihf
           - x86_64-unknown-linux-gnu
         profile: [dev, release]
         include:
diff --git a/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
index 8435dd3dedd33..e2b3d95585efe 100644
--- a/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
@@ -1,11 +1,9 @@
 FROM ubuntu:25.10
 RUN apt-get update && apt-get install -y --no-install-recommends \
   gcc \
-  g++ \
   ca-certificates \
   libc6-dev \
   gcc-aarch64-linux-gnu \
-  g++-aarch64-linux-gnu \
   libc6-dev-arm64-cross \
   qemu-user \
   make \
diff --git a/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile b/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile
index 0e8efc64bbff7..d7c12493ad9cf 100644
--- a/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile
@@ -2,7 +2,6 @@ FROM ubuntu:25.10
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
   gcc \
-  g++ \
   ca-certificates \
   libc6-dev \
   libc6-dev-arm64-cross \
diff --git a/library/stdarch/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile b/library/stdarch/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile
index 6d4ff24828672..23e4d5a341152 100644
--- a/library/stdarch/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile
+++ b/library/stdarch/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile
@@ -7,7 +7,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
   libc6-dev-armhf-cross \
   qemu-user \
   make \
-  file
+  file \
+  clang \
+  lld
 ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
     CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \
     OBJDUMP=arm-linux-gnueabihf-objdump
diff --git a/library/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/library/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
index c0a4ed3e706d1..02744917af6df 100644
--- a/library/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
+++ b/library/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
@@ -1,11 +1,9 @@
 FROM ubuntu:24.04
 RUN apt-get update && apt-get install -y --no-install-recommends \
   gcc \
-  g++ \
   ca-certificates \
   libc6-dev \
   gcc-arm-linux-gnueabihf \
-  g++-arm-linux-gnueabihf \
   libc6-dev-armhf-cross \
   qemu-user \
   make \
diff --git a/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
index ca6192a38d951..17d1ac67e714f 100644
--- a/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
+++ b/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
@@ -6,11 +6,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
   make \
   ca-certificates \
   wget \
-  xz-utils \
-  clang \
-  libstdc++-14-dev \
-  build-essential \
-  lld
+  xz-utils
 
 RUN wget http://ci-mirrors.rust-lang.org/sde-external-10.8.0-2026-03-15-lin.tar.xz -O sde.tar.xz
 RUN mkdir intel-sde
diff --git a/library/stdarch/ci/intrinsic-test-docker.sh b/library/stdarch/ci/intrinsic-test-docker.sh
index beeff42c76212..948b53dc67bc9 100755
--- a/library/stdarch/ci/intrinsic-test-docker.sh
+++ b/library/stdarch/ci/intrinsic-test-docker.sh
@@ -48,7 +48,7 @@ run() {
       --workdir /checkout \
       --privileged \
       stdarch \
-      sh -c "HOME=/tmp PATH=\$PATH:/rust/bin exec ci/intrinsic-test.sh ${1}"
+      sh -c "HOME=/tmp PATH=\$PATH:/rust/bin exec ci/intrinsic-test.sh"
 }
 
 if [ -z "$1" ]; then
diff --git a/library/stdarch/ci/intrinsic-test.sh b/library/stdarch/ci/intrinsic-test.sh
index 89104e2672ad9..1f3a2caf50654 100755
--- a/library/stdarch/ci/intrinsic-test.sh
+++ b/library/stdarch/ci/intrinsic-test.sh
@@ -5,127 +5,56 @@ set -ex
 : "${TARGET?The TARGET environment variable must be set.}"
 
 export RUSTFLAGS="${RUSTFLAGS} -D warnings -Z merge-functions=disabled -Z verify-llvm-ir"
-export HOST_RUSTFLAGS="${RUSTFLAGS}"
 export PROFILE="${PROFILE:="release"}"
 
-case ${TARGET} in
-    # On 32-bit use a static relocation model which avoids some extra
-    # instructions when dealing with static data, notably allowing some
-    # instruction assertion checks to pass below the 20 instruction limit. If
-    # this is the default, dynamic, then too many instructions are generated
-    # when we assert the instruction for a function and it causes tests to fail.
-    i686-* | i586-*)
-        export RUSTFLAGS="${RUSTFLAGS} -C relocation-model=static"
-        ;;
-    # Some x86_64 targets enable by default more features beyond SSE2,
-    # which cause some instruction assertion checks to fail.
-    x86_64-*)
-        export RUSTFLAGS="${RUSTFLAGS} -C target-feature=-sse3"
-        ;;
-    #Unoptimized build uses fast-isel which breaks with msa
-    mips-* | mipsel-*)
-	export RUSTFLAGS="${RUSTFLAGS} -C llvm-args=-fast-isel=false"
-	;;
-    armv7-*eabihf | thumbv7-*eabihf)
-        export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+neon"
-        ;;
-    # Some of our test dependencies use the deprecated `gcc` crates which
-    # doesn't detect RISC-V compilers automatically, so do it manually here.
-    riscv*)
-        export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+zk,+zks,+zbb,+zbc"
-        ;;
-esac
-
 echo "RUSTFLAGS=${RUSTFLAGS}"
-echo "OBJDUMP=${OBJDUMP}"
 echo "PROFILE=${PROFILE}"
 
 INTRINSIC_TEST="--manifest-path=crates/intrinsic-test/Cargo.toml"
 
-# Test targets compiled with extra features.
+export CC="clang"
+
 case ${TARGET} in
-    # Setup aarch64 & armv7 specific variables, the runner, along with some
-    # tests to skip
-    aarch64-unknown-linux-gnu*)
-        TEST_CPPFLAGS="-fuse-ld=lld -I/usr/aarch64-linux-gnu/include/ -I/usr/aarch64-linux-gnu/include/c++/9/aarch64-linux-gnu/"
-        TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt
-        TEST_CXX_COMPILER="clang++"
-        TEST_RUNNER="${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER}"
-        : "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}"
+    aarch64_be*)
+        export CFLAGS="-I${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc/usr/include --sysroot={AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc -Wno-nonportable-vector-initialization"
+        TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64_be.txt
         ;;
 
-    aarch64_be-unknown-linux-gnu*)
-        TEST_CPPFLAGS="-fuse-ld=lld"
-        TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64_be.txt
-        TEST_CXX_COMPILER="clang++"
-        TEST_RUNNER="${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER}"
-        : "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}"
+    aarch64*)
+        export CFLAGS="-I/usr/aarch64-linux-gnu/include/"
+        TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt
         ;;
 
-    armv7-unknown-linux-gnueabihf*)
-        TEST_CPPFLAGS="-fuse-ld=lld -I/usr/arm-linux-gnueabihf/include/ -I/usr/arm-linux-gnueabihf/include/c++/9/arm-linux-gnueabihf/"
+    armv7*)
+        export CFLAGS="-I/usr/arm-linux-gnueabihf/include/"
         TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_arm.txt
-        TEST_CXX_COMPILER="clang++"
-        TEST_RUNNER="${CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER}"
-        : "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}"
         ;;
 
-    x86_64-unknown-linux-gnu*)
-        TEST_CPPFLAGS="-fuse-ld=lld -I/usr/include/x86_64-linux-gnu/"
-        TEST_CXX_COMPILER="clang++"
-        TEST_RUNNER="${CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER}"
+    x86_64*)
+        export CFLAGS="-I/usr/include/x86_64-linux-gnu/"
         TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_x86.txt
-        : "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=20}"
         ;;
     *)
         ;;
 
 esac
 
-# Arm specific
 case "${TARGET}" in
-    aarch64-unknown-linux-gnu*|armv7-unknown-linux-gnueabihf*)
-        CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \
-            cargo run "${INTRINSIC_TEST}" --release  \
-            --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \
-            --runner "${TEST_RUNNER}" \
-            --cppcompiler "${TEST_CXX_COMPILER}" \
-            --skip "${TEST_SKIP_INTRINSICS}" \
-            --target "${TARGET}" \
-            --profile "${PROFILE}" \
-            --sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}"
-        ;;
-
-    aarch64_be-unknown-linux-gnu*)
-        CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \
-            cargo run "${INTRINSIC_TEST}" --release  \
-            --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \
-            --runner "${TEST_RUNNER}" \
-            --cppcompiler "${TEST_CXX_COMPILER}" \
-            --skip "${TEST_SKIP_INTRINSICS}" \
-            --target "${TARGET}" \
-            --profile "${PROFILE}" \
-            --linker "${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER}" \
-            --cxx-toolchain-dir "${AARCH64_BE_TOOLCHAIN}" \
-            --sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}"
-        ;;
-
     x86_64-unknown-linux-gnu*)
-        # `CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER` is not necessary for `intrinsic-test`
-        # because the binary needs to run directly on the host.
-        # Hence the use of `env -u`.
         env -u CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER \
-            CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" \
-            RUST_LOG=warn RUST_BACKTRACE=1 \
             cargo run "${INTRINSIC_TEST}" --release \
             --bin intrinsic-test -- intrinsics_data/x86-intel.xml \
-            --runner "${TEST_RUNNER}" \
             --skip "${TEST_SKIP_INTRINSICS}" \
-            --cppcompiler "${TEST_CXX_COMPILER}" \
-            --target "${TARGET}" \
-            --profile "${PROFILE}" \
-            --sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}"
+            --target "${TARGET}"
+
+        echo "${CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER}"
         ;;
-     *)
+    *)
+        cargo run "${INTRINSIC_TEST}" --release \
+            --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \
+            --skip "${TEST_SKIP_INTRINSICS}" \
+            --target "${TARGET}"
         ;;
 esac
+
+cargo test --manifest-path=rust_programs/Cargo.toml --target "${TARGET}" --profile "${PROFILE}"

From f09bf0bf511e0a41adb39f78268a0e68c02c89e4 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Sat, 9 May 2026 09:29:43 +0530
Subject: [PATCH 60/78] gen-arm: toggle `big_endian_inverse` where required

---
 .../core_arch/src/aarch64/neon/generated.rs   | 13218 +++-
 .../src/arm_shared/neon/generated.rs          | 57548 ++++++++--------
 .../spec/neon/aarch64.spec.yml                |    89 +-
 .../spec/neon/arm_shared.spec.yml             |    76 +-
 4 files changed, 39559 insertions(+), 31372 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 3241583cf07ca..0672f66c9e08b 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -1029,6 +1029,7 @@ pub fn vbcaxq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
 #[doc = "Floating-point complex add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot270_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
@@ -1045,8 +1046,33 @@ pub fn vcadd_rot270_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { _vcadd_rot270_f16(a, b) }
 }
 #[doc = "Floating-point complex add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot270_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub fn vcadd_rot270_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcadd.rot270.v4f16"
+        )]
+        fn _vcadd_rot270_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
+    }
+    unsafe {
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float16x4_t = _vcadd_rot270_f16(a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point complex add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
@@ -1063,8 +1089,33 @@ pub fn vcaddq_rot270_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { _vcaddq_rot270_f16(a, b) }
 }
 #[doc = "Floating-point complex add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub fn vcaddq_rot270_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcadd.rot270.v8f16"
+        )]
+        fn _vcaddq_rot270_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
+    }
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x8_t = _vcaddq_rot270_f16(a, b);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point complex add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot270_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fcma")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcadd))]
@@ -1079,8 +1130,31 @@ pub fn vcadd_rot270_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe { _vcadd_rot270_f32(a, b) }
 }
 #[doc = "Floating-point complex add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot270_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub fn vcadd_rot270_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcadd.rot270.v2f32"
+        )]
+        fn _vcadd_rot270_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    unsafe {
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float32x2_t = _vcadd_rot270_f32(a, b);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Floating-point complex add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fcma")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcadd))]
@@ -1095,8 +1169,31 @@ pub fn vcaddq_rot270_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe { _vcaddq_rot270_f32(a, b) }
 }
 #[doc = "Floating-point complex add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub fn vcaddq_rot270_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcadd.rot270.v4f32"
+        )]
+        fn _vcaddq_rot270_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    unsafe {
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float32x4_t = _vcaddq_rot270_f32(a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point complex add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fcma")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcadd))]
@@ -1111,8 +1208,31 @@ pub fn vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     unsafe { _vcaddq_rot270_f64(a, b) }
 }
 #[doc = "Floating-point complex add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub fn vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcadd.rot270.v2f64"
+        )]
+        fn _vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float64x2_t = _vcaddq_rot270_f64(a, b);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Floating-point complex add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot90_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
@@ -1129,8 +1249,33 @@ pub fn vcadd_rot90_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { _vcadd_rot90_f16(a, b) }
 }
 #[doc = "Floating-point complex add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot90_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub fn vcadd_rot90_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcadd.rot90.v4f16"
+        )]
+        fn _vcadd_rot90_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
+    }
+    unsafe {
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float16x4_t = _vcadd_rot90_f16(a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point complex add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
@@ -1147,8 +1292,33 @@ pub fn vcaddq_rot90_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { _vcaddq_rot90_f16(a, b) }
 }
 #[doc = "Floating-point complex add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub fn vcaddq_rot90_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcadd.rot90.v8f16"
+        )]
+        fn _vcaddq_rot90_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
+    }
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x8_t = _vcaddq_rot90_f16(a, b);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point complex add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot90_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fcma")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcadd))]
@@ -1163,8 +1333,31 @@ pub fn vcadd_rot90_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe { _vcadd_rot90_f32(a, b) }
 }
 #[doc = "Floating-point complex add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot90_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub fn vcadd_rot90_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcadd.rot90.v2f32"
+        )]
+        fn _vcadd_rot90_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    unsafe {
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float32x2_t = _vcadd_rot90_f32(a, b);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Floating-point complex add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fcma")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcadd))]
@@ -1179,8 +1372,31 @@ pub fn vcaddq_rot90_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe { _vcaddq_rot90_f32(a, b) }
 }
 #[doc = "Floating-point complex add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub fn vcaddq_rot90_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcadd.rot90.v4f32"
+        )]
+        fn _vcaddq_rot90_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    unsafe {
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float32x4_t = _vcaddq_rot90_f32(a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point complex add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fcma")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcadd))]
@@ -1194,6 +1410,28 @@ pub fn vcaddq_rot90_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     }
     unsafe { _vcaddq_rot90_f64(a, b) }
 }
+#[doc = "Floating-point complex add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub fn vcaddq_rot90_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcadd.rot90.v2f64"
+        )]
+        fn _vcaddq_rot90_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float64x2_t = _vcaddq_rot90_f64(a, b);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
 #[doc = "Floating-point absolute compare greater than or equal"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcage_f64)"]
 #[inline]
@@ -2905,6 +3143,7 @@ pub fn vcltzh_f16(a: f16) -> u16 {
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fcma")]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
@@ -2921,58 +3160,158 @@ pub fn vcmla_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t
     unsafe { _vcmla_f16(a, b, c) }
 }
 #[doc = "Floating-point complex multiply accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_f16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,fcma")]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcmla))]
-pub fn vcmlaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
+pub fn vcmla_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcmla.rot0.v8f16"
+            link_name = "llvm.aarch64.neon.vcmla.rot0.v4f16"
         )]
-        fn _vcmlaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t;
+        fn _vcmla_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t;
+    }
+    unsafe {
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let c: float16x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]);
+        let ret_val: float16x4_t = _vcmla_f16(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
-    unsafe { _vcmlaq_f16(a, b, c) }
 }
 #[doc = "Floating-point complex multiply accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fcma")]
+#[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcmla))]
-pub fn vcmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+pub fn vcmlaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcmla.rot0.v2f32"
+            link_name = "llvm.aarch64.neon.vcmla.rot0.v8f16"
         )]
-        fn _vcmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
+        fn _vcmlaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t;
     }
-    unsafe { _vcmla_f32(a, b, c) }
+    unsafe { _vcmlaq_f16(a, b, c) }
 }
 #[doc = "Floating-point complex multiply accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,fcma")]
+#[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcmla))]
-pub fn vcmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+pub fn vcmlaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcmla.rot0.v4f32"
+            link_name = "llvm.aarch64.neon.vcmla.rot0.v8f16"
         )]
-        fn _vcmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
+        fn _vcmlaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t;
+    }
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let c: float16x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x8_t = _vcmlaq_f16(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_f32)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub fn vcmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot0.v2f32"
+        )]
+        fn _vcmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
+    }
+    unsafe { _vcmla_f32(a, b, c) }
+}
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub fn vcmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot0.v2f32"
+        )]
+        fn _vcmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
+    }
+    unsafe {
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let c: float32x2_t = simd_shuffle!(c, c, [1, 0]);
+        let ret_val: float32x2_t = _vcmla_f32(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f32)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub fn vcmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot0.v4f32"
+        )]
+        fn _vcmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
     }
     unsafe { _vcmlaq_f32(a, b, c) }
 }
 #[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub fn vcmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot0.v4f32"
+        )]
+        fn _vcmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
+    }
+    unsafe {
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let c: float32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]);
+        let ret_val: float32x4_t = _vcmlaq_f32(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fcma")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcmla))]
@@ -2987,6 +3326,29 @@ pub fn vcmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t
     unsafe { _vcmlaq_f64(a, b, c) }
 }
 #[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub fn vcmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot0.v2f64"
+        )]
+        fn _vcmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
+    }
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let c: float64x2_t = simd_shuffle!(c, c, [1, 0]);
+        let ret_val: float64x2_t = _vcmlaq_f64(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_lane_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fcma")]
@@ -3141,6 +3503,7 @@ pub fn vcmlaq_laneq_f32<const LANE: i32>(
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fcma")]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
@@ -3157,8 +3520,34 @@ pub fn vcmla_rot180_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float
     unsafe { _vcmla_rot180_f16(a, b, c) }
 }
 #[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fcma")]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub fn vcmla_rot180_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot180.v4f16"
+        )]
+        fn _vcmla_rot180_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t;
+    }
+    unsafe {
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let c: float16x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]);
+        let ret_val: float16x4_t = _vcmla_rot180_f16(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fcma")]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
@@ -3175,8 +3564,34 @@ pub fn vcmlaq_rot180_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> floa
     unsafe { _vcmlaq_rot180_f16(a, b, c) }
 }
 #[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fcma")]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub fn vcmlaq_rot180_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot180.v8f16"
+        )]
+        fn _vcmlaq_rot180_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t;
+    }
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let c: float16x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x8_t = _vcmlaq_rot180_f16(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fcma")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcmla))]
@@ -3191,8 +3606,32 @@ pub fn vcmla_rot180_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float
     unsafe { _vcmla_rot180_f32(a, b, c) }
 }
 #[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub fn vcmla_rot180_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot180.v2f32"
+        )]
+        fn _vcmla_rot180_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
+    }
+    unsafe {
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let c: float32x2_t = simd_shuffle!(c, c, [1, 0]);
+        let ret_val: float32x2_t = _vcmla_rot180_f32(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fcma")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcmla))]
@@ -3207,8 +3646,32 @@ pub fn vcmlaq_rot180_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> floa
     unsafe { _vcmlaq_rot180_f32(a, b, c) }
 }
 #[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub fn vcmlaq_rot180_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot180.v4f32"
+        )]
+        fn _vcmlaq_rot180_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
+    }
+    unsafe {
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let c: float32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]);
+        let ret_val: float32x4_t = _vcmlaq_rot180_f32(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fcma")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcmla))]
@@ -3223,6 +3686,29 @@ pub fn vcmlaq_rot180_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> floa
     unsafe { _vcmlaq_rot180_f64(a, b, c) }
 }
 #[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub fn vcmlaq_rot180_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot180.v2f64"
+        )]
+        fn _vcmlaq_rot180_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
+    }
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let c: float64x2_t = simd_shuffle!(c, c, [1, 0]);
+        let ret_val: float64x2_t = _vcmlaq_rot180_f64(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_lane_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fcma")]
@@ -3377,6 +3863,7 @@ pub fn vcmlaq_rot180_laneq_f32<const LANE: i32>(
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fcma")]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
@@ -3393,62 +3880,162 @@ pub fn vcmla_rot270_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float
     unsafe { _vcmla_rot270_f16(a, b, c) }
 }
 #[doc = "Floating-point complex multiply accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_f16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,fcma")]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcmla))]
-pub fn vcmlaq_rot270_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
+pub fn vcmla_rot270_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcmla.rot270.v8f16"
+            link_name = "llvm.aarch64.neon.vcmla.rot270.v4f16"
         )]
-        fn _vcmlaq_rot270_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t;
+        fn _vcmla_rot270_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t;
+    }
+    unsafe {
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let c: float16x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]);
+        let ret_val: float16x4_t = _vcmla_rot270_f16(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
-    unsafe { _vcmlaq_rot270_f16(a, b, c) }
 }
 #[doc = "Floating-point complex multiply accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fcma")]
+#[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcmla))]
-pub fn vcmla_rot270_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+pub fn vcmlaq_rot270_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcmla.rot270.v2f32"
+            link_name = "llvm.aarch64.neon.vcmla.rot270.v8f16"
         )]
-        fn _vcmla_rot270_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
+        fn _vcmlaq_rot270_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t;
     }
-    unsafe { _vcmla_rot270_f32(a, b, c) }
+    unsafe { _vcmlaq_rot270_f16(a, b, c) }
 }
 #[doc = "Floating-point complex multiply accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,fcma")]
+#[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcmla))]
-pub fn vcmlaq_rot270_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+pub fn vcmlaq_rot270_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcmla.rot270.v4f32"
+            link_name = "llvm.aarch64.neon.vcmla.rot270.v8f16"
         )]
-        fn _vcmlaq_rot270_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
+        fn _vcmlaq_rot270_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t;
+    }
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let c: float16x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x8_t = _vcmlaq_rot270_f16(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
-    unsafe { _vcmlaq_rot270_f32(a, b, c) }
 }
 #[doc = "Floating-point complex multiply accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fcma")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcmla))]
-pub fn vcmlaq_rot270_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+pub fn vcmla_rot270_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot270.v2f32"
+        )]
+        fn _vcmla_rot270_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
+    }
+    unsafe { _vcmla_rot270_f32(a, b, c) }
+}
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub fn vcmla_rot270_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot270.v2f32"
+        )]
+        fn _vcmla_rot270_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
+    }
+    unsafe {
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let c: float32x2_t = simd_shuffle!(c, c, [1, 0]);
+        let ret_val: float32x2_t = _vcmla_rot270_f32(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f32)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub fn vcmlaq_rot270_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot270.v4f32"
+        )]
+        fn _vcmlaq_rot270_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
+    }
+    unsafe { _vcmlaq_rot270_f32(a, b, c) }
+}
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub fn vcmlaq_rot270_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot270.v4f32"
+        )]
+        fn _vcmlaq_rot270_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
+    }
+    unsafe {
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let c: float32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]);
+        let ret_val: float32x4_t = _vcmlaq_rot270_f32(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f64)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub fn vcmlaq_rot270_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -3459,6 +4046,29 @@ pub fn vcmlaq_rot270_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> floa
     unsafe { _vcmlaq_rot270_f64(a, b, c) }
 }
 #[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub fn vcmlaq_rot270_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot270.v2f64"
+        )]
+        fn _vcmlaq_rot270_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
+    }
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let c: float64x2_t = simd_shuffle!(c, c, [1, 0]);
+        let ret_val: float64x2_t = _vcmlaq_rot270_f64(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_lane_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fcma")]
@@ -3613,6 +4223,7 @@ pub fn vcmlaq_rot270_laneq_f32<const LANE: i32>(
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fcma")]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
@@ -3629,8 +4240,34 @@ pub fn vcmla_rot90_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float1
     unsafe { _vcmla_rot90_f16(a, b, c) }
 }
 #[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fcma")]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub fn vcmla_rot90_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot90.v4f16"
+        )]
+        fn _vcmla_rot90_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t;
+    }
+    unsafe {
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let c: float16x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]);
+        let ret_val: float16x4_t = _vcmla_rot90_f16(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fcma")]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
@@ -3647,8 +4284,34 @@ pub fn vcmlaq_rot90_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float
     unsafe { _vcmlaq_rot90_f16(a, b, c) }
 }
 #[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fcma")]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub fn vcmlaq_rot90_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot90.v8f16"
+        )]
+        fn _vcmlaq_rot90_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t;
+    }
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let c: float16x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x8_t = _vcmlaq_rot90_f16(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fcma")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcmla))]
@@ -3663,8 +4326,32 @@ pub fn vcmla_rot90_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float3
     unsafe { _vcmla_rot90_f32(a, b, c) }
 }
 #[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub fn vcmla_rot90_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot90.v2f32"
+        )]
+        fn _vcmla_rot90_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
+    }
+    unsafe {
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let c: float32x2_t = simd_shuffle!(c, c, [1, 0]);
+        let ret_val: float32x2_t = _vcmla_rot90_f32(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fcma")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcmla))]
@@ -3679,8 +4366,32 @@ pub fn vcmlaq_rot90_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float
     unsafe { _vcmlaq_rot90_f32(a, b, c) }
 }
 #[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub fn vcmlaq_rot90_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot90.v4f32"
+        )]
+        fn _vcmlaq_rot90_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
+    }
+    unsafe {
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let c: float32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]);
+        let ret_val: float32x4_t = _vcmlaq_rot90_f32(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_f64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fcma")]
 #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg_attr(test, assert_instr(fcmla))]
@@ -3695,6 +4406,29 @@ pub fn vcmlaq_rot90_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float
     unsafe { _vcmlaq_rot90_f64(a, b, c) }
 }
 #[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_f64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub fn vcmlaq_rot90_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot90.v2f64"
+        )]
+        fn _vcmlaq_rot90_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
+    }
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let c: float64x2_t = simd_shuffle!(c, c, [1, 0]);
+        let ret_val: float64x2_t = _vcmlaq_rot90_f64(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_lane_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fcma")]
@@ -3849,12 +4583,26 @@ pub fn vcmlaq_rot90_laneq_f32<const LANE: i32>(
 #[doc = "Join two smaller vectors into a single larger vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(mov))]
 pub fn vcombine_f64(a: float64x1_t, b: float64x1_t) -> float64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 1]) }
 }
+#[doc = "Join two smaller vectors into a single larger vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn vcombine_f64(a: float64x1_t, b: float64x1_t) -> float64x2_t {
+    unsafe {
+        let ret_val: float64x2_t = simd_shuffle!(a, b, [0, 1]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_f32)"]
 #[inline]
@@ -4356,6 +5104,7 @@ pub fn vcopyq_lane_u64<const LANE1: i32, const LANE2: i32>(
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(
     all(test, target_endian = "little"),
@@ -4373,6 +5122,31 @@ pub fn vcopyq_lane_p64<const LANE1: i32, const LANE2: i32>(
     unsafe { simd_insert!(a, LANE1 as u32, simd_extract!(b, LANE2 as u32, p64)) }
 }
 #[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 1, LANE2 = 0)
+)]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vcopyq_lane_p64<const LANE1: i32, const LANE2: i32>(
+    a: poly64x2_t,
+    b: poly64x1_t,
+) -> poly64x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
+    static_assert!(LANE2 == 0);
+    unsafe {
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: poly64x2_t = vcombine_p64(b, b);
+        let ret_val: poly64x2_t =
+            simd_insert!(a, LANE1 as u32, simd_extract!(b, LANE2 as u32, p64));
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -4740,6 +5514,7 @@ pub fn vcopyq_laneq_p16<const LANE1: i32, const LANE2: i32>(
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(
     all(test, target_endian = "little"),
@@ -4756,17 +5531,42 @@ pub fn vcopyq_laneq_p64<const LANE1: i32, const LANE2: i32>(
     unsafe { simd_insert!(a, LANE1 as u32, simd_extract!(b, LANE2 as u32, p64)) }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vcreate_f64(a: u64) -> float64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Floating-point convert"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_f64)"]
-#[inline]
+pub fn vcopyq_laneq_p64<const LANE1: i32, const LANE2: i32>(
+    a: poly64x2_t,
+    b: poly64x2_t,
+) -> poly64x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
+    static_assert_uimm_bits!(LANE2, 1);
+    unsafe {
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: poly64x2_t =
+            simd_insert!(a, LANE1 as u32, simd_extract!(b, LANE2 as u32, p64));
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vcreate_f64(a: u64) -> float64x1_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_f64)"]
+#[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(all(test, target_endian = "little"), assert_instr(fcvtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -7351,6 +8151,7 @@ pub fn vduph_laneq_f16<const N: i32>(a: float16x8_t) -> f16 {
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(dup, N = 0))]
 #[rustc_legacy_const_generics(1)]
@@ -7360,8 +8161,24 @@ pub fn vdupq_lane_f64<const N: i32>(a: float64x1_t) -> float64x2_t {
     unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) }
 }
 #[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(dup, N = 0))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vdupq_lane_f64<const N: i32>(a: float64x1_t) -> float64x2_t {
+    static_assert!(N == 0);
+    unsafe {
+        let ret_val: float64x2_t = simd_shuffle!(a, a, [N as u32, N as u32]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(dup, N = 0))]
 #[rustc_legacy_const_generics(1)]
@@ -7371,8 +8188,24 @@ pub fn vdupq_lane_p64<const N: i32>(a: poly64x1_t) -> poly64x2_t {
     unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) }
 }
 #[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(dup, N = 0))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vdupq_lane_p64<const N: i32>(a: poly64x1_t) -> poly64x2_t {
+    static_assert!(N == 0);
+    unsafe {
+        let ret_val: poly64x2_t = simd_shuffle!(a, a, [N as u32, N as u32]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(dup, N = 1))]
 #[rustc_legacy_const_generics(1)]
@@ -7382,8 +8215,25 @@ pub fn vdupq_laneq_f64<const N: i32>(a: float64x2_t) -> float64x2_t {
     unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) }
 }
 #[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(dup, N = 1))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vdupq_laneq_f64<const N: i32>(a: float64x2_t) -> float64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: float64x2_t = simd_shuffle!(a, a, [N as u32, N as u32]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(dup, N = 1))]
 #[rustc_legacy_const_generics(1)]
@@ -7393,6 +8243,22 @@ pub fn vdupq_laneq_p64<const N: i32>(a: poly64x2_t) -> poly64x2_t {
     unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) }
 }
 #[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(dup, N = 1))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vdupq_laneq_p64<const N: i32>(a: poly64x2_t) -> poly64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe {
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: poly64x2_t = simd_shuffle!(a, a, [N as u32, N as u32]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_lane_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -7655,6 +8521,7 @@ pub fn veor3q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ext, N = 1))]
 #[rustc_legacy_const_generics(2)]
@@ -7664,8 +8531,26 @@ pub fn vextq_f64<const N: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
 }
 #[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ext, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vextq_f64<const N: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float64x2_t = simd_shuffle!(a, b, [N as u32, N as u32 + 1]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ext, N = 1))]
 #[rustc_legacy_const_generics(2)]
@@ -7674,6 +8559,23 @@ pub fn vextq_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
     static_assert_uimm_bits!(N, 1);
     unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
 }
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ext, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vextq_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe {
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: poly64x2_t = simd_shuffle!(a, b, [N as u32, N as u32 + 1]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
 #[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f64)"]
 #[inline]
@@ -8001,6 +8903,7 @@ pub fn vfmad_laneq_f64<const LANE: i32>(a: f64, b: f64, c: float64x2_t) -> f64 {
 #[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_high_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
@@ -8017,8 +8920,34 @@ pub fn vfmlal_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float3
     unsafe { _vfmlal_high_f16(r, a, b) }
 }
 #[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_high_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(fmlal2))]
+pub fn vfmlal_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmlal2.v2f32.v4f16"
+        )]
+        fn _vfmlal_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t;
+    }
+    unsafe {
+        let r: float32x2_t = simd_shuffle!(r, r, [1, 0]);
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float32x2_t = _vfmlal_high_f16(r, a, b);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_high_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
@@ -8034,6 +8963,31 @@ pub fn vfmlalq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float
     }
     unsafe { _vfmlalq_high_f16(r, a, b) }
 }
+#[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_high_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(fmlal2))]
+pub fn vfmlalq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmlal2.v4f32.v8f16"
+        )]
+        fn _vfmlalq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t;
+    }
+    unsafe {
+        let r: float32x4_t = simd_shuffle!(r, r, [3, 2, 1, 0]);
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float32x4_t = _vfmlalq_high_f16(r, a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
 #[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_lane_high_f16)"]
 #[inline]
@@ -8173,6 +9127,7 @@ pub fn vfmlalq_laneq_low_f16<const LANE: i32>(
 #[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_low_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
@@ -8189,8 +9144,34 @@ pub fn vfmlal_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32
     unsafe { _vfmlal_low_f16(r, a, b) }
 }
 #[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_low_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(fmlal))]
+pub fn vfmlal_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmlal.v2f32.v4f16"
+        )]
+        fn _vfmlal_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t;
+    }
+    unsafe {
+        let r: float32x2_t = simd_shuffle!(r, r, [1, 0]);
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float32x2_t = _vfmlal_low_f16(r, a, b);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_low_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
@@ -8206,9 +9187,35 @@ pub fn vfmlalq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float3
     }
     unsafe { _vfmlalq_low_f16(r, a, b) }
 }
+#[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_low_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(fmlal))]
+pub fn vfmlalq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmlal.v4f32.v8f16"
+        )]
+        fn _vfmlalq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t;
+    }
+    unsafe {
+        let r: float32x4_t = simd_shuffle!(r, r, [3, 2, 1, 0]);
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float32x4_t = _vfmlalq_low_f16(r, a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
 #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_high_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
@@ -8225,8 +9232,34 @@ pub fn vfmlsl_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float3
     unsafe { _vfmlsl_high_f16(r, a, b) }
 }
 #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_high_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(fmlsl2))]
+pub fn vfmlsl_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmlsl2.v2f32.v4f16"
+        )]
+        fn _vfmlsl_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t;
+    }
+    unsafe {
+        let r: float32x2_t = simd_shuffle!(r, r, [1, 0]);
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float32x2_t = _vfmlsl_high_f16(r, a, b);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_high_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
@@ -8242,6 +9275,31 @@ pub fn vfmlslq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float
     }
     unsafe { _vfmlslq_high_f16(r, a, b) }
 }
+#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_high_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(fmlsl2))]
+pub fn vfmlslq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmlsl2.v4f32.v8f16"
+        )]
+        fn _vfmlslq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t;
+    }
+    unsafe {
+        let r: float32x4_t = simd_shuffle!(r, r, [3, 2, 1, 0]);
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float32x4_t = _vfmlslq_high_f16(r, a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
 #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_lane_high_f16)"]
 #[inline]
@@ -8381,6 +9439,7 @@ pub fn vfmlslq_laneq_low_f16<const LANE: i32>(
 #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_low_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
@@ -8397,8 +9456,34 @@ pub fn vfmlsl_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32
     unsafe { _vfmlsl_low_f16(r, a, b) }
 }
 #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_low_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(fmlsl))]
+pub fn vfmlsl_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmlsl.v2f32.v4f16"
+        )]
+        fn _vfmlsl_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t;
+    }
+    unsafe {
+        let r: float32x2_t = simd_shuffle!(r, r, [1, 0]);
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float32x2_t = _vfmlsl_low_f16(r, a, b);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_low_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
@@ -8414,6 +9499,31 @@ pub fn vfmlslq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float3
     }
     unsafe { _vfmlslq_low_f16(r, a, b) }
 }
+#[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_low_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(fmlsl))]
+pub fn vfmlslq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmlsl.v4f32.v8f16"
+        )]
+        fn _vfmlslq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t;
+    }
+    unsafe {
+        let r: float32x4_t = simd_shuffle!(r, r, [3, 2, 1, 0]);
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float32x4_t = _vfmlslq_low_f16(r, a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
 #[doc = "Floating-point fused multiply-subtract from accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f64)"]
 #[inline]
@@ -8739,6 +9849,7 @@ pub fn vfmsd_laneq_f64<const LANE: i32>(a: f64, b: f64, c: float64x2_t) -> f64 {
 #[doc = "Duplicate vector element to vector or scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmov))]
@@ -8746,8 +9857,22 @@ pub fn vget_high_f64(a: float64x2_t) -> float64x1_t {
     unsafe { float64x1_t([simd_extract!(a, 1)]) }
 }
 #[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fmov))]
+pub fn vget_high_f64(a: float64x2_t) -> float64x1_t {
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        float64x1_t([simd_extract!(a, 1)])
+    }
+}
+#[doc = "Duplicate vector element to vector or scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
@@ -8755,8 +9880,22 @@ pub fn vget_low_f64(a: float64x2_t) -> float64x1_t {
     unsafe { float64x1_t([simd_extract!(a, 0)]) }
 }
 #[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vget_low_f64(a: float64x2_t) -> float64x1_t {
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        float64x1_t([simd_extract!(a, 0)])
+    }
+}
+#[doc = "Duplicate vector element to vector or scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[rustc_legacy_const_generics(1)]
@@ -8765,6 +9904,21 @@ pub fn vgetq_lane_f64<const IMM5: i32>(a: float64x2_t) -> f64 {
     static_assert_uimm_bits!(IMM5, 1);
     unsafe { simd_extract!(a, IMM5 as u32) }
 }
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
+pub fn vgetq_lane_f64<const IMM5: i32>(a: float64x2_t) -> f64 {
+    static_assert_uimm_bits!(IMM5, 1);
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        simd_extract!(a, IMM5 as u32)
+    }
+}
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16)"]
 #[doc = "## Safety"]
@@ -13090,6 +14244,7 @@ pub fn vpaddd_u64(a: uint64x2_t) -> u64 {
 #[doc = "Floating-point add pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fp16")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
@@ -13102,8 +14257,27 @@ pub fn vpaddq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     }
 }
 #[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(faddp))]
+pub fn vpaddq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<8>());
+        let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<8>());
+        let ret_val: float16x8_t = simd_add(even, odd);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point add pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(faddp))]
@@ -13115,8 +14289,26 @@ pub fn vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     }
 }
 #[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(faddp))]
+pub fn vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe {
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<4>());
+        let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<4>());
+        let ret_val: float32x4_t = simd_add(even, odd);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point add pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(faddp))]
@@ -13127,9 +14319,27 @@ pub fn vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
         simd_add(even, odd)
     }
 }
+#[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(faddp))]
+pub fn vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>());
+        let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>());
+        let ret_val: float64x2_t = simd_add(even, odd);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
 #[doc = "Add Pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addp))]
@@ -13141,8 +14351,32 @@ pub fn vpaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     }
 }
 #[doc = "Add Pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(addp))]
+pub fn vpaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe {
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<16>());
+        let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<16>());
+        let ret_val: int8x16_t = simd_add(even, odd);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Add Pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addp))]
@@ -13154,8 +14388,26 @@ pub fn vpaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     }
 }
 #[doc = "Add Pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(addp))]
+pub fn vpaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe {
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<8>());
+        let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<8>());
+        let ret_val: int16x8_t = simd_add(even, odd);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Add Pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addp))]
@@ -13167,8 +14419,26 @@ pub fn vpaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     }
 }
 #[doc = "Add Pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(addp))]
+pub fn vpaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe {
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<4>());
+        let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<4>());
+        let ret_val: int32x4_t = simd_add(even, odd);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Add Pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addp))]
@@ -13180,8 +14450,26 @@ pub fn vpaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     }
 }
 #[doc = "Add Pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(addp))]
+pub fn vpaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe {
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: int64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>());
+        let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>());
+        let ret_val: int64x2_t = simd_add(even, odd);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Add Pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addp))]
@@ -13193,8 +14481,32 @@ pub fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     }
 }
 #[doc = "Add Pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(addp))]
+pub fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe {
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<16>());
+        let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<16>());
+        let ret_val: uint8x16_t = simd_add(even, odd);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Add Pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addp))]
@@ -13206,8 +14518,26 @@ pub fn vpaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     }
 }
 #[doc = "Add Pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(addp))]
+pub fn vpaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe {
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<8>());
+        let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<8>());
+        let ret_val: uint16x8_t = simd_add(even, odd);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Add Pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addp))]
@@ -13219,21 +14549,57 @@ pub fn vpaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     }
 }
 #[doc = "Add Pairwise"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addp))]
-pub fn vpaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+pub fn vpaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unsafe {
-        let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>());
-        let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>());
-        simd_add(even, odd)
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<4>());
+        let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<4>());
+        let ret_val: uint32x4_t = simd_add(even, odd);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
-#[doc = "Floating-point add pairwise"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_f16)"]
-#[inline]
+#[doc = "Add Pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u64)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(addp))]
+pub fn vpaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe {
+        let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>());
+        let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>());
+        simd_add(even, odd)
+    }
+}
+#[doc = "Add Pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(addp))]
+pub fn vpaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe {
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>());
+        let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>());
+        let ret_val: uint64x2_t = simd_add(even, odd);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_f16)"]
+#[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fp16")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
@@ -13249,8 +14615,32 @@ pub fn vpmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { _vpmax_f16(a, b) }
 }
 #[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(fmaxp))]
+pub fn vpmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxp.v4f16"
+        )]
+        fn _vpmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
+    }
+    unsafe {
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float16x4_t = _vpmax_f16(a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point add pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fp16")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
@@ -13266,8 +14656,32 @@ pub fn vpmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { _vpmaxq_f16(a, b) }
 }
 #[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(fmaxp))]
+pub fn vpmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxp.v8f16"
+        )]
+        fn _vpmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
+    }
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x8_t = _vpmaxq_f16(a, b);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point add pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnm_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fp16")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
@@ -13283,8 +14697,32 @@ pub fn vpmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { _vpmaxnm_f16(a, b) }
 }
 #[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnm_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+pub fn vpmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmp.v4f16"
+        )]
+        fn _vpmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
+    }
+    unsafe {
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float16x4_t = _vpmaxnm_f16(a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point add pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fp16")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
@@ -13299,9 +14737,33 @@ pub fn vpmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     }
     unsafe { _vpmaxnmq_f16(a, b) }
 }
+#[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+pub fn vpmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmp.v8f16"
+        )]
+        fn _vpmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
+    }
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x8_t = _vpmaxnmq_f16(a, b);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
 #[doc = "Floating-point Maximum Number Pairwise (vector)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnm_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fmaxnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -13316,8 +14778,31 @@ pub fn vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe { _vpmaxnm_f32(a, b) }
 }
 #[doc = "Floating-point Maximum Number Pairwise (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnm_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmp.v2f32"
+        )]
+        fn _vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    unsafe {
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float32x2_t = _vpmaxnm_f32(a, b);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Floating-point Maximum Number Pairwise (vector)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fmaxnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -13332,8 +14817,31 @@ pub fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe { _vpmaxnmq_f32(a, b) }
 }
 #[doc = "Floating-point Maximum Number Pairwise (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmp.v4f32"
+        )]
+        fn _vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    unsafe {
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float32x4_t = _vpmaxnmq_f32(a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point Maximum Number Pairwise (vector)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fmaxnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -13347,9 +14855,32 @@ pub fn vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     }
     unsafe { _vpmaxnmq_f64(a, b) }
 }
+#[doc = "Floating-point Maximum Number Pairwise (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmp.v2f64"
+        )]
+        fn _vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float64x2_t = _vpmaxnmq_f64(a, b);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
 #[doc = "Floating-point maximum number pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmqd_f64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fmaxnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -13364,8 +14895,29 @@ pub fn vpmaxnmqd_f64(a: float64x2_t) -> f64 {
     unsafe { _vpmaxnmqd_f64(a) }
 }
 #[doc = "Floating-point maximum number pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmqd_f64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vpmaxnmqd_f64(a: float64x2_t) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmv.f64.v2f64"
+        )]
+        fn _vpmaxnmqd_f64(a: float64x2_t) -> f64;
+    }
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        _vpmaxnmqd_f64(a)
+    }
+}
+#[doc = "Floating-point maximum number pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnms_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fmaxnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -13379,9 +14931,30 @@ pub fn vpmaxnms_f32(a: float32x2_t) -> f32 {
     }
     unsafe { _vpmaxnms_f32(a) }
 }
+#[doc = "Floating-point maximum number pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnms_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vpmaxnms_f32(a: float32x2_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmv.f32.v2f32"
+        )]
+        fn _vpmaxnms_f32(a: float32x2_t) -> f32;
+    }
+    unsafe {
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        _vpmaxnms_f32(a)
+    }
+}
 #[doc = "Folding maximum of adjacent pairs"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmaxp))]
@@ -13396,45 +14969,120 @@ pub fn vpmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe { _vpmaxq_f32(a, b) }
 }
 #[doc = "Folding maximum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmaxp))]
-pub fn vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+pub fn vpmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmaxp.v2f64"
+            link_name = "llvm.aarch64.neon.fmaxp.v4f32"
         )]
-        fn _vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+        fn _vpmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    unsafe {
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float32x4_t = _vpmaxq_f32(a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
-    unsafe { _vpmaxq_f64(a, b) }
 }
 #[doc = "Folding maximum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_f64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(smaxp))]
-pub fn vpmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+#[cfg_attr(test, assert_instr(fmaxp))]
+pub fn vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smaxp.v16i8"
+            link_name = "llvm.aarch64.neon.fmaxp.v2f64"
         )]
-        fn _vpmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+        fn _vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
-    unsafe { _vpmaxq_s8(a, b) }
+    unsafe { _vpmaxq_f64(a, b) }
 }
 #[doc = "Folding maximum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_f64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(smaxp))]
-pub fn vpmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
+#[cfg_attr(test, assert_instr(fmaxp))]
+pub fn vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxp.v2f64"
+        )]
+        fn _vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float64x2_t = _vpmaxq_f64(a, b);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_s8)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(smaxp))]
+pub fn vpmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smaxp.v16i8"
+        )]
+        fn _vpmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    unsafe { _vpmaxq_s8(a, b) }
+}
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_s8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(smaxp))]
+pub fn vpmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smaxp.v16i8"
+        )]
+        fn _vpmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    unsafe {
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = _vpmaxq_s8(a, b);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_s16)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(smaxp))]
+pub fn vpmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
             link_name = "llvm.aarch64.neon.smaxp.v8i16"
@@ -13444,8 +15092,31 @@ pub fn vpmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unsafe { _vpmaxq_s16(a, b) }
 }
 #[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_s16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(smaxp))]
+pub fn vpmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smaxp.v8i16"
+        )]
+        fn _vpmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    unsafe {
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x8_t = _vpmaxq_s16(a, b);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Folding maximum of adjacent pairs"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(smaxp))]
@@ -13460,8 +15131,31 @@ pub fn vpmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unsafe { _vpmaxq_s32(a, b) }
 }
 #[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_s32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(smaxp))]
+pub fn vpmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smaxp.v4i32"
+        )]
+        fn _vpmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    unsafe {
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: int32x4_t = _vpmaxq_s32(a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Folding maximum of adjacent pairs"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(umaxp))]
@@ -13476,8 +15170,37 @@ pub fn vpmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unsafe { _vpmaxq_u8(a, b) }
 }
 #[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(umaxp))]
+pub fn vpmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umaxp.v16i8"
+        )]
+        fn _vpmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
+    }
+    unsafe {
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t = _vpmaxq_u8(a, b);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Folding maximum of adjacent pairs"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(umaxp))]
@@ -13492,8 +15215,31 @@ pub fn vpmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unsafe { _vpmaxq_u16(a, b) }
 }
 #[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_u16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(umaxp))]
+pub fn vpmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umaxp.v8i16"
+        )]
+        fn _vpmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
+    }
+    unsafe {
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x8_t = _vpmaxq_u16(a, b);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Folding maximum of adjacent pairs"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(umaxp))]
@@ -13507,6 +15253,28 @@ pub fn vpmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     }
     unsafe { _vpmaxq_u32(a, b) }
 }
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_u32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(umaxp))]
+pub fn vpmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umaxp.v4i32"
+        )]
+        fn _vpmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
+    }
+    unsafe {
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = _vpmaxq_u32(a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
 #[doc = "Floating-point maximum pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxqd_f64)"]
 #[inline]
@@ -13542,6 +15310,7 @@ pub fn vpmaxs_f32(a: float32x2_t) -> f32 {
 #[doc = "Floating-point add pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fp16")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
@@ -13557,8 +15326,32 @@ pub fn vpmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { _vpmin_f16(a, b) }
 }
 #[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(fminp))]
+pub fn vpmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminp.v4f16"
+        )]
+        fn _vpmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
+    }
+    unsafe {
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float16x4_t = _vpmin_f16(a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point add pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fp16")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
@@ -13574,8 +15367,32 @@ pub fn vpminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { _vpminq_f16(a, b) }
 }
 #[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(fminp))]
+pub fn vpminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminp.v8f16"
+        )]
+        fn _vpminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
+    }
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x8_t = _vpminq_f16(a, b);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point add pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnm_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fp16")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
@@ -13591,8 +15408,32 @@ pub fn vpminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { _vpminnm_f16(a, b) }
 }
 #[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnm_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(fminnmp))]
+pub fn vpminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmp.v4f16"
+        )]
+        fn _vpminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
+    }
+    unsafe {
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float16x4_t = _vpminnm_f16(a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point add pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fp16")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
@@ -13607,25 +15448,72 @@ pub fn vpminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     }
     unsafe { _vpminnmq_f16(a, b) }
 }
-#[doc = "Floating-point Minimum Number Pairwise (vector)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnm_f32)"]
+#[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fminnmp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+pub fn vpminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fminnmp.v2f32"
+            link_name = "llvm.aarch64.neon.fminnmp.v8f16"
         )]
-        fn _vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+        fn _vpminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
+    }
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x8_t = _vpminnmq_f16(a, b);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point Minimum Number Pairwise (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnm_f32)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmp.v2f32"
+        )]
+        fn _vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
     unsafe { _vpminnm_f32(a, b) }
 }
 #[doc = "Floating-point Minimum Number Pairwise (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnm_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmp.v2f32"
+        )]
+        fn _vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    unsafe {
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float32x2_t = _vpminnm_f32(a, b);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Floating-point Minimum Number Pairwise (vector)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -13640,8 +15528,31 @@ pub fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe { _vpminnmq_f32(a, b) }
 }
 #[doc = "Floating-point Minimum Number Pairwise (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmp.v4f32"
+        )]
+        fn _vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    unsafe {
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float32x4_t = _vpminnmq_f32(a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point Minimum Number Pairwise (vector)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -13655,9 +15566,32 @@ pub fn vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     }
     unsafe { _vpminnmq_f64(a, b) }
 }
+#[doc = "Floating-point Minimum Number Pairwise (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmp.v2f64"
+        )]
+        fn _vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float64x2_t = _vpminnmq_f64(a, b);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
 #[doc = "Floating-point minimum number pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmqd_f64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -13672,8 +15606,29 @@ pub fn vpminnmqd_f64(a: float64x2_t) -> f64 {
     unsafe { _vpminnmqd_f64(a) }
 }
 #[doc = "Floating-point minimum number pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmqd_f64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vpminnmqd_f64(a: float64x2_t) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmv.f64.v2f64"
+        )]
+        fn _vpminnmqd_f64(a: float64x2_t) -> f64;
+    }
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        _vpminnmqd_f64(a)
+    }
+}
+#[doc = "Floating-point minimum number pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnms_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -13687,9 +15642,30 @@ pub fn vpminnms_f32(a: float32x2_t) -> f32 {
     }
     unsafe { _vpminnms_f32(a) }
 }
+#[doc = "Floating-point minimum number pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnms_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vpminnms_f32(a: float32x2_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmv.f32.v2f32"
+        )]
+        fn _vpminnms_f32(a: float32x2_t) -> f32;
+    }
+    unsafe {
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        _vpminnms_f32(a)
+    }
+}
 #[doc = "Folding minimum of adjacent pairs"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fminp))]
@@ -13704,8 +15680,31 @@ pub fn vpminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe { _vpminq_f32(a, b) }
 }
 #[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fminp))]
+pub fn vpminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminp.v4f32"
+        )]
+        fn _vpminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    unsafe {
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float32x4_t = _vpminq_f32(a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Folding minimum of adjacent pairs"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_f64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fminp))]
@@ -13720,8 +15719,31 @@ pub fn vpminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     unsafe { _vpminq_f64(a, b) }
 }
 #[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_f64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fminp))]
+pub fn vpminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminp.v2f64"
+        )]
+        fn _vpminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float64x2_t = _vpminq_f64(a, b);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Folding minimum of adjacent pairs"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(sminp))]
@@ -13736,8 +15758,37 @@ pub fn vpminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unsafe { _vpminq_s8(a, b) }
 }
 #[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_s8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(sminp))]
+pub fn vpminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sminp.v16i8"
+        )]
+        fn _vpminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    unsafe {
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = _vpminq_s8(a, b);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Folding minimum of adjacent pairs"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(sminp))]
@@ -13752,88 +15803,230 @@ pub fn vpminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unsafe { _vpminq_s16(a, b) }
 }
 #[doc = "Folding minimum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(sminp))]
-pub fn vpminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+pub fn vpminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sminp.v4i32"
+            link_name = "llvm.aarch64.neon.sminp.v8i16"
         )]
-        fn _vpminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+        fn _vpminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    unsafe {
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x8_t = _vpminq_s16(a, b);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
-    unsafe { _vpminq_s32(a, b) }
 }
 #[doc = "Folding minimum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(uminp))]
-pub fn vpminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+#[cfg_attr(test, assert_instr(sminp))]
+pub fn vpminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uminp.v16i8"
+            link_name = "llvm.aarch64.neon.sminp.v4i32"
         )]
-        fn _vpminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
+        fn _vpminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-    unsafe { _vpminq_u8(a, b) }
+    unsafe { _vpminq_s32(a, b) }
 }
 #[doc = "Folding minimum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(uminp))]
-pub fn vpminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+#[cfg_attr(test, assert_instr(sminp))]
+pub fn vpminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uminp.v8i16"
+            link_name = "llvm.aarch64.neon.sminp.v4i32"
         )]
-        fn _vpminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
+        fn _vpminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    unsafe {
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: int32x4_t = _vpminq_s32(a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
-    unsafe { _vpminq_u16(a, b) }
 }
 #[doc = "Folding minimum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(uminp))]
-pub fn vpminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+pub fn vpminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uminp.v4i32"
+            link_name = "llvm.aarch64.neon.uminp.v16i8"
         )]
-        fn _vpminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
+        fn _vpminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
     }
-    unsafe { _vpminq_u32(a, b) }
+    unsafe { _vpminq_u8(a, b) }
 }
-#[doc = "Floating-point minimum pairwise"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminqd_f64)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fminp))]
-pub fn vpminqd_f64(a: float64x2_t) -> f64 {
+#[cfg_attr(test, assert_instr(uminp))]
+pub fn vpminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fminv.f64.v2f64"
+            link_name = "llvm.aarch64.neon.uminp.v16i8"
+        )]
+        fn _vpminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
+    }
+    unsafe {
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t = _vpminq_u8(a, b);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u16)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(uminp))]
+pub fn vpminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uminp.v8i16"
+        )]
+        fn _vpminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
+    }
+    unsafe { _vpminq_u16(a, b) }
+}
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(uminp))]
+pub fn vpminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uminp.v8i16"
+        )]
+        fn _vpminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
+    }
+    unsafe {
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x8_t = _vpminq_u16(a, b);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u32)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(uminp))]
+pub fn vpminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uminp.v4i32"
+        )]
+        fn _vpminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
+    }
+    unsafe { _vpminq_u32(a, b) }
+}
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(uminp))]
+pub fn vpminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uminp.v4i32"
+        )]
+        fn _vpminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
+    }
+    unsafe {
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = _vpminq_u32(a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Floating-point minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminqd_f64)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fminp))]
+pub fn vpminqd_f64(a: float64x2_t) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminv.f64.v2f64"
         )]
         fn _vpminqd_f64(a: float64x2_t) -> f64;
     }
     unsafe { _vpminqd_f64(a) }
 }
 #[doc = "Floating-point minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminqd_f64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fminp))]
+pub fn vpminqd_f64(a: float64x2_t) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminv.f64.v2f64"
+        )]
+        fn _vpminqd_f64(a: float64x2_t) -> f64;
+    }
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        _vpminqd_f64(a)
+    }
+}
+#[doc = "Floating-point minimum pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmins_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fminp))]
@@ -13847,6 +16040,26 @@ pub fn vpmins_f32(a: float32x2_t) -> f32 {
     }
     unsafe { _vpmins_f32(a) }
 }
+#[doc = "Floating-point minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmins_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fminp))]
+pub fn vpmins_f32(a: float32x2_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminv.f32.v2f32"
+        )]
+        fn _vpmins_f32(a: float32x2_t) -> f32;
+    }
+    unsafe {
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        _vpmins_f32(a)
+    }
+}
 #[doc = "Signed saturating Absolute value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s64)"]
 #[inline]
@@ -16334,6 +18547,7 @@ fn vqtbl1q(a: int8x16_t, b: uint8x16_t) -> int8x16_t {
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16341,8 +18555,25 @@ pub fn vqtbl1_s8(a: int8x16_t, b: uint8x8_t) -> int8x8_t {
     vqtbl1(a, b)
 }
 #[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1_s8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbl1_s8(a: int8x16_t, b: uint8x8_t) -> int8x8_t {
+    unsafe {
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = vqtbl1(a, b);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16350,8 +18581,30 @@ pub fn vqtbl1q_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t {
     vqtbl1q(a, b)
 }
 #[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_s8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbl1q_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t {
+    unsafe {
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = vqtbl1q(a, b);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16359,8 +18612,25 @@ pub fn vqtbl1_u8(a: uint8x16_t, b: uint8x8_t) -> uint8x8_t {
     unsafe { transmute(vqtbl1(transmute(a), b)) }
 }
 #[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbl1_u8(a: uint8x16_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe {
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x8_t = transmute(vqtbl1(transmute(a), b));
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16368,8 +18638,30 @@ pub fn vqtbl1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unsafe { transmute(vqtbl1q(transmute(a), b)) }
 }
 #[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbl1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe {
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t = transmute(vqtbl1q(transmute(a), b));
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16377,8 +18669,25 @@ pub fn vqtbl1_p8(a: poly8x16_t, b: uint8x8_t) -> poly8x8_t {
     unsafe { transmute(vqtbl1(transmute(a), b)) }
 }
 #[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1_p8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbl1_p8(a: poly8x16_t, b: uint8x8_t) -> poly8x8_t {
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x8_t = transmute(vqtbl1(transmute(a), b));
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16386,6 +18695,27 @@ pub fn vqtbl1q_p8(a: poly8x16_t, b: uint8x16_t) -> poly8x16_t {
     unsafe { transmute(vqtbl1q(transmute(a), b)) }
 }
 #[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_p8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbl1q_p8(a: poly8x16_t, b: uint8x16_t) -> poly8x16_t {
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t = transmute(vqtbl1q(transmute(a), b));
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -16420,6 +18750,7 @@ fn vqtbl2q(a: int8x16_t, b: int8x16_t, c: uint8x16_t) -> int8x16_t {
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16427,17 +18758,74 @@ pub fn vqtbl2_s8(a: int8x16x2_t, b: uint8x8_t) -> int8x8_t {
     vqtbl2(a.0, a.1, b)
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbl2q_s8(a: int8x16x2_t, b: uint8x16_t) -> int8x16_t {
-    vqtbl2q(a.0, a.1, b)
+pub fn vqtbl2_s8(a: int8x16x2_t, b: uint8x8_t) -> int8x8_t {
+    let mut a: int8x16x2_t = a;
+    unsafe {
+        a.0 = simd_shuffle!(
+            a.0,
+            a.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.1 = simd_shuffle!(
+            a.1,
+            a.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = vqtbl2(a.0, a.1, b);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_s8)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbl2q_s8(a: int8x16x2_t, b: uint8x16_t) -> int8x16_t {
+    vqtbl2q(a.0, a.1, b)
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_s8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbl2q_s8(a: int8x16x2_t, b: uint8x16_t) -> int8x16_t {
+    let mut a: int8x16x2_t = a;
+    unsafe {
+        a.0 = simd_shuffle!(
+            a.0,
+            a.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.1 = simd_shuffle!(
+            a.1,
+            a.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = vqtbl2q(a.0, a.1, b);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16445,8 +18833,34 @@ pub fn vqtbl2_u8(a: uint8x16x2_t, b: uint8x8_t) -> uint8x8_t {
     unsafe { transmute(vqtbl2(transmute(a.0), transmute(a.1), b)) }
 }
 #[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbl2_u8(a: uint8x16x2_t, b: uint8x8_t) -> uint8x8_t {
+    let mut a: uint8x16x2_t = a;
+    unsafe {
+        a.0 = simd_shuffle!(
+            a.0,
+            a.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.1 = simd_shuffle!(
+            a.1,
+            a.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x8_t = transmute(vqtbl2(transmute(a.0), transmute(a.1), b));
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16454,8 +18868,39 @@ pub fn vqtbl2q_u8(a: uint8x16x2_t, b: uint8x16_t) -> uint8x16_t {
     unsafe { transmute(vqtbl2q(transmute(a.0), transmute(a.1), b)) }
 }
 #[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbl2q_u8(a: uint8x16x2_t, b: uint8x16_t) -> uint8x16_t {
+    let mut a: uint8x16x2_t = a;
+    unsafe {
+        a.0 = simd_shuffle!(
+            a.0,
+            a.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.1 = simd_shuffle!(
+            a.1,
+            a.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t = transmute(vqtbl2q(transmute(a.0), transmute(a.1), b));
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16463,8 +18908,34 @@ pub fn vqtbl2_p8(a: poly8x16x2_t, b: uint8x8_t) -> poly8x8_t {
     unsafe { transmute(vqtbl2(transmute(a.0), transmute(a.1), b)) }
 }
 #[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_p8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbl2_p8(a: poly8x16x2_t, b: uint8x8_t) -> poly8x8_t {
+    let mut a: poly8x16x2_t = a;
+    unsafe {
+        a.0 = simd_shuffle!(
+            a.0,
+            a.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.1 = simd_shuffle!(
+            a.1,
+            a.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x8_t = transmute(vqtbl2(transmute(a.0), transmute(a.1), b));
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16472,6 +18943,36 @@ pub fn vqtbl2q_p8(a: poly8x16x2_t, b: uint8x16_t) -> poly8x16_t {
     unsafe { transmute(vqtbl2q(transmute(a.0), transmute(a.1), b)) }
 }
 #[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_p8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbl2q_p8(a: poly8x16x2_t, b: uint8x16_t) -> poly8x16_t {
+    let mut a: poly8x16x2_t = a;
+    unsafe {
+        a.0 = simd_shuffle!(
+            a.0,
+            a.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.1 = simd_shuffle!(
+            a.1,
+            a.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t = transmute(vqtbl2q(transmute(a.0), transmute(a.1), b));
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -16506,6 +19007,7 @@ fn vqtbl3q(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: uint8x16_t) -> int8x16_t
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16513,8 +19015,39 @@ pub fn vqtbl3_s8(a: int8x16x3_t, b: uint8x8_t) -> int8x8_t {
     vqtbl3(a.0, a.1, a.2, b)
 }
 #[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_s8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbl3_s8(a: int8x16x3_t, b: uint8x8_t) -> int8x8_t {
+    let mut a: int8x16x3_t = a;
+    unsafe {
+        a.0 = simd_shuffle!(
+            a.0,
+            a.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.1 = simd_shuffle!(
+            a.1,
+            a.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.2 = simd_shuffle!(
+            a.2,
+            a.2,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = vqtbl3(a.0, a.1, a.2, b);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16522,8 +19055,44 @@ pub fn vqtbl3q_s8(a: int8x16x3_t, b: uint8x16_t) -> int8x16_t {
     vqtbl3q(a.0, a.1, a.2, b)
 }
 #[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_s8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbl3q_s8(a: int8x16x3_t, b: uint8x16_t) -> int8x16_t {
+    let mut a: int8x16x3_t = a;
+    unsafe {
+        a.0 = simd_shuffle!(
+            a.0,
+            a.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.1 = simd_shuffle!(
+            a.1,
+            a.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.2 = simd_shuffle!(
+            a.2,
+            a.2,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = vqtbl3q(a.0, a.1, a.2, b);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16531,8 +19100,40 @@ pub fn vqtbl3_u8(a: uint8x16x3_t, b: uint8x8_t) -> uint8x8_t {
     unsafe { transmute(vqtbl3(transmute(a.0), transmute(a.1), transmute(a.2), b)) }
 }
 #[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbl3_u8(a: uint8x16x3_t, b: uint8x8_t) -> uint8x8_t {
+    let mut a: uint8x16x3_t = a;
+    unsafe {
+        a.0 = simd_shuffle!(
+            a.0,
+            a.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.1 = simd_shuffle!(
+            a.1,
+            a.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.2 = simd_shuffle!(
+            a.2,
+            a.2,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x8_t =
+            transmute(vqtbl3(transmute(a.0), transmute(a.1), transmute(a.2), b));
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16540,8 +19141,45 @@ pub fn vqtbl3q_u8(a: uint8x16x3_t, b: uint8x16_t) -> uint8x16_t {
     unsafe { transmute(vqtbl3q(transmute(a.0), transmute(a.1), transmute(a.2), b)) }
 }
 #[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbl3q_u8(a: uint8x16x3_t, b: uint8x16_t) -> uint8x16_t {
+    let mut a: uint8x16x3_t = a;
+    unsafe {
+        a.0 = simd_shuffle!(
+            a.0,
+            a.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.1 = simd_shuffle!(
+            a.1,
+            a.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.2 = simd_shuffle!(
+            a.2,
+            a.2,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t =
+            transmute(vqtbl3q(transmute(a.0), transmute(a.1), transmute(a.2), b));
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16549,8 +19187,40 @@ pub fn vqtbl3_p8(a: poly8x16x3_t, b: uint8x8_t) -> poly8x8_t {
     unsafe { transmute(vqtbl3(transmute(a.0), transmute(a.1), transmute(a.2), b)) }
 }
 #[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_p8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbl3_p8(a: poly8x16x3_t, b: uint8x8_t) -> poly8x8_t {
+    let mut a: poly8x16x3_t = a;
+    unsafe {
+        a.0 = simd_shuffle!(
+            a.0,
+            a.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.1 = simd_shuffle!(
+            a.1,
+            a.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.2 = simd_shuffle!(
+            a.2,
+            a.2,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x8_t =
+            transmute(vqtbl3(transmute(a.0), transmute(a.1), transmute(a.2), b));
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16558,6 +19228,42 @@ pub fn vqtbl3q_p8(a: poly8x16x3_t, b: uint8x16_t) -> poly8x16_t {
     unsafe { transmute(vqtbl3q(transmute(a.0), transmute(a.1), transmute(a.2), b)) }
 }
 #[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_p8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbl3q_p8(a: poly8x16x3_t, b: uint8x16_t) -> poly8x16_t {
+    let mut a: poly8x16x3_t = a;
+    unsafe {
+        a.0 = simd_shuffle!(
+            a.0,
+            a.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.1 = simd_shuffle!(
+            a.1,
+            a.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.2 = simd_shuffle!(
+            a.2,
+            a.2,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t =
+            transmute(vqtbl3q(transmute(a.0), transmute(a.1), transmute(a.2), b));
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -16604,6 +19310,7 @@ fn vqtbl4q(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, e: uint8x16_t
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16611,22 +19318,99 @@ pub fn vqtbl4_s8(a: int8x16x4_t, b: uint8x8_t) -> int8x8_t {
     vqtbl4(a.0, a.1, a.2, a.3, b)
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbl4q_s8(a: int8x16x4_t, b: uint8x16_t) -> int8x16_t {
-    vqtbl4q(a.0, a.1, a.2, a.3, b)
+pub fn vqtbl4_s8(a: int8x16x4_t, b: uint8x8_t) -> int8x8_t {
+    let mut a: int8x16x4_t = a;
+    unsafe {
+        a.0 = simd_shuffle!(
+            a.0,
+            a.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.1 = simd_shuffle!(
+            a.1,
+            a.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.2 = simd_shuffle!(
+            a.2,
+            a.2,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.3 = simd_shuffle!(
+            a.3,
+            a.3,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = vqtbl4(a.0, a.1, a.2, a.3, b);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbl4_u8(a: uint8x16x4_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe {
+pub fn vqtbl4q_s8(a: int8x16x4_t, b: uint8x16_t) -> int8x16_t {
+    vqtbl4q(a.0, a.1, a.2, a.3, b)
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_s8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbl4q_s8(a: int8x16x4_t, b: uint8x16_t) -> int8x16_t {
+    let mut a: int8x16x4_t = a;
+    unsafe {
+        a.0 = simd_shuffle!(
+            a.0,
+            a.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.1 = simd_shuffle!(
+            a.1,
+            a.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.2 = simd_shuffle!(
+            a.2,
+            a.2,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.3 = simd_shuffle!(
+            a.3,
+            a.3,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = vqtbl4q(a.0, a.1, a.2, a.3, b);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_u8)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbl4_u8(a: uint8x16x4_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe {
         transmute(vqtbl4(
             transmute(a.0),
             transmute(a.1),
@@ -16637,8 +19421,50 @@ pub fn vqtbl4_u8(a: uint8x16x4_t, b: uint8x8_t) -> uint8x8_t {
     }
 }
 #[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbl4_u8(a: uint8x16x4_t, b: uint8x8_t) -> uint8x8_t {
+    let mut a: uint8x16x4_t = a;
+    unsafe {
+        a.0 = simd_shuffle!(
+            a.0,
+            a.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.1 = simd_shuffle!(
+            a.1,
+            a.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.2 = simd_shuffle!(
+            a.2,
+            a.2,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.3 = simd_shuffle!(
+            a.3,
+            a.3,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x8_t = transmute(vqtbl4(
+            transmute(a.0),
+            transmute(a.1),
+            transmute(a.2),
+            transmute(a.3),
+            b,
+        ));
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16654,8 +19480,55 @@ pub fn vqtbl4q_u8(a: uint8x16x4_t, b: uint8x16_t) -> uint8x16_t {
     }
 }
 #[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbl4q_u8(a: uint8x16x4_t, b: uint8x16_t) -> uint8x16_t {
+    let mut a: uint8x16x4_t = a;
+    unsafe {
+        a.0 = simd_shuffle!(
+            a.0,
+            a.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.1 = simd_shuffle!(
+            a.1,
+            a.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.2 = simd_shuffle!(
+            a.2,
+            a.2,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.3 = simd_shuffle!(
+            a.3,
+            a.3,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t = transmute(vqtbl4q(
+            transmute(a.0),
+            transmute(a.1),
+            transmute(a.2),
+            transmute(a.3),
+            b,
+        ));
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16671,8 +19544,50 @@ pub fn vqtbl4_p8(a: poly8x16x4_t, b: uint8x8_t) -> poly8x8_t {
     }
 }
 #[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_p8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbl4_p8(a: poly8x16x4_t, b: uint8x8_t) -> poly8x8_t {
+    let mut a: poly8x16x4_t = a;
+    unsafe {
+        a.0 = simd_shuffle!(
+            a.0,
+            a.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.1 = simd_shuffle!(
+            a.1,
+            a.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.2 = simd_shuffle!(
+            a.2,
+            a.2,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.3 = simd_shuffle!(
+            a.3,
+            a.3,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x8_t = transmute(vqtbl4(
+            transmute(a.0),
+            transmute(a.1),
+            transmute(a.2),
+            transmute(a.3),
+            b,
+        ));
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16687,6 +19602,52 @@ pub fn vqtbl4q_p8(a: poly8x16x4_t, b: uint8x16_t) -> poly8x16_t {
         ))
     }
 }
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_p8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbl4q_p8(a: poly8x16x4_t, b: uint8x16_t) -> poly8x16_t {
+    let mut a: poly8x16x4_t = a;
+    unsafe {
+        a.0 = simd_shuffle!(
+            a.0,
+            a.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.1 = simd_shuffle!(
+            a.1,
+            a.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.2 = simd_shuffle!(
+            a.2,
+            a.2,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        a.3 = simd_shuffle!(
+            a.3,
+            a.3,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t = transmute(vqtbl4q(
+            transmute(a.0),
+            transmute(a.1),
+            transmute(a.2),
+            transmute(a.3),
+            b,
+        ));
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1)"]
 #[inline]
@@ -16722,6 +19683,7 @@ fn vqtbx1q(a: int8x16_t, b: int8x16_t, c: uint8x16_t) -> int8x16_t {
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16729,8 +19691,26 @@ pub fn vqtbx1_s8(a: int8x8_t, b: int8x16_t, c: uint8x8_t) -> int8x8_t {
     vqtbx1(a, b, c)
 }
 #[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1_s8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbx1_s8(a: int8x8_t, b: int8x16_t, c: uint8x8_t) -> int8x8_t {
+    unsafe {
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = vqtbx1(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16738,8 +19718,32 @@ pub fn vqtbx1q_s8(a: int8x16_t, b: int8x16_t, c: uint8x16_t) -> int8x16_t {
     vqtbx1q(a, b, c)
 }
 #[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_s8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbx1q_s8(a: int8x16_t, b: int8x16_t, c: uint8x16_t) -> int8x16_t {
+    unsafe {
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let c: uint8x16_t =
+            simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = vqtbx1q(a, b, c);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16747,8 +19751,26 @@ pub fn vqtbx1_u8(a: uint8x8_t, b: uint8x16_t, c: uint8x8_t) -> uint8x8_t {
     unsafe { transmute(vqtbx1(transmute(a), transmute(b), c)) }
 }
 #[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbx1_u8(a: uint8x8_t, b: uint8x16_t, c: uint8x8_t) -> uint8x8_t {
+    unsafe {
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x8_t = transmute(vqtbx1(transmute(a), transmute(b), c));
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16756,8 +19778,32 @@ pub fn vqtbx1q_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
     unsafe { transmute(vqtbx1q(transmute(a), transmute(b), c)) }
 }
 #[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbx1q_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
+    unsafe {
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let c: uint8x16_t =
+            simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t = transmute(vqtbx1q(transmute(a), transmute(b), c));
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16765,8 +19811,26 @@ pub fn vqtbx1_p8(a: poly8x8_t, b: poly8x16_t, c: uint8x8_t) -> poly8x8_t {
     unsafe { transmute(vqtbx1(transmute(a), transmute(b), c)) }
 }
 #[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1_p8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbx1_p8(a: poly8x8_t, b: poly8x16_t, c: uint8x8_t) -> poly8x8_t {
+    unsafe {
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x8_t = transmute(vqtbx1(transmute(a), transmute(b), c));
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16774,6 +19838,29 @@ pub fn vqtbx1q_p8(a: poly8x16_t, b: poly8x16_t, c: uint8x16_t) -> poly8x16_t {
     unsafe { transmute(vqtbx1q(transmute(a), transmute(b), c)) }
 }
 #[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_p8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbx1q_p8(a: poly8x16_t, b: poly8x16_t, c: uint8x16_t) -> poly8x16_t {
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let c: uint8x16_t =
+            simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t = transmute(vqtbx1q(transmute(a), transmute(b), c));
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -16808,6 +19895,7 @@ fn vqtbx2q(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: uint8x16_t) -> int8x16_t
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16815,17 +19903,77 @@ pub fn vqtbx2_s8(a: int8x8_t, b: int8x16x2_t, c: uint8x8_t) -> int8x8_t {
     vqtbx2(a, b.0, b.1, c)
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbx2q_s8(a: int8x16_t, b: int8x16x2_t, c: uint8x16_t) -> int8x16_t {
-    vqtbx2q(a, b.0, b.1, c)
-}
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_u8)"]
-#[inline]
+pub fn vqtbx2_s8(a: int8x8_t, b: int8x16x2_t, c: uint8x8_t) -> int8x8_t {
+    let mut b: int8x16x2_t = b;
+    unsafe {
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        b.0 = simd_shuffle!(
+            b.0,
+            b.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.1 = simd_shuffle!(
+            b.1,
+            b.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = vqtbx2(a, b.0, b.1, c);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_s8)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbx2q_s8(a: int8x16_t, b: int8x16x2_t, c: uint8x16_t) -> int8x16_t {
+    vqtbx2q(a, b.0, b.1, c)
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_s8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbx2q_s8(a: int8x16_t, b: int8x16x2_t, c: uint8x16_t) -> int8x16_t {
+    let mut b: int8x16x2_t = b;
+    unsafe {
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        b.0 = simd_shuffle!(
+            b.0,
+            b.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.1 = simd_shuffle!(
+            b.1,
+            b.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let c: uint8x16_t =
+            simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = vqtbx2q(a, b.0, b.1, c);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_u8)"]
+#[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16833,8 +19981,35 @@ pub fn vqtbx2_u8(a: uint8x8_t, b: uint8x16x2_t, c: uint8x8_t) -> uint8x8_t {
     unsafe { transmute(vqtbx2(transmute(a), transmute(b.0), transmute(b.1), c)) }
 }
 #[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbx2_u8(a: uint8x8_t, b: uint8x16x2_t, c: uint8x8_t) -> uint8x8_t {
+    let mut b: uint8x16x2_t = b;
+    unsafe {
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        b.0 = simd_shuffle!(
+            b.0,
+            b.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.1 = simd_shuffle!(
+            b.1,
+            b.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x8_t = transmute(vqtbx2(transmute(a), transmute(b.0), transmute(b.1), c));
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16842,8 +20017,42 @@ pub fn vqtbx2q_u8(a: uint8x16_t, b: uint8x16x2_t, c: uint8x16_t) -> uint8x16_t {
     unsafe { transmute(vqtbx2q(transmute(a), transmute(b.0), transmute(b.1), c)) }
 }
 #[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbx2q_u8(a: uint8x16_t, b: uint8x16x2_t, c: uint8x16_t) -> uint8x16_t {
+    let mut b: uint8x16x2_t = b;
+    unsafe {
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        b.0 = simd_shuffle!(
+            b.0,
+            b.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.1 = simd_shuffle!(
+            b.1,
+            b.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let c: uint8x16_t =
+            simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t =
+            transmute(vqtbx2q(transmute(a), transmute(b.0), transmute(b.1), c));
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16851,8 +20060,35 @@ pub fn vqtbx2_p8(a: poly8x8_t, b: poly8x16x2_t, c: uint8x8_t) -> poly8x8_t {
     unsafe { transmute(vqtbx2(transmute(a), transmute(b.0), transmute(b.1), c)) }
 }
 #[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_p8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbx2_p8(a: poly8x8_t, b: poly8x16x2_t, c: uint8x8_t) -> poly8x8_t {
+    let mut b: poly8x16x2_t = b;
+    unsafe {
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        b.0 = simd_shuffle!(
+            b.0,
+            b.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.1 = simd_shuffle!(
+            b.1,
+            b.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x8_t = transmute(vqtbx2(transmute(a), transmute(b.0), transmute(b.1), c));
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16860,6 +20096,39 @@ pub fn vqtbx2q_p8(a: poly8x16_t, b: poly8x16x2_t, c: uint8x16_t) -> poly8x16_t {
     unsafe { transmute(vqtbx2q(transmute(a), transmute(b.0), transmute(b.1), c)) }
 }
 #[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_p8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbx2q_p8(a: poly8x16_t, b: poly8x16x2_t, c: uint8x16_t) -> poly8x16_t {
+    let mut b: poly8x16x2_t = b;
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        b.0 = simd_shuffle!(
+            b.0,
+            b.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.1 = simd_shuffle!(
+            b.1,
+            b.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let c: uint8x16_t =
+            simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t =
+            transmute(vqtbx2q(transmute(a), transmute(b.0), transmute(b.1), c));
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -16901,6 +20170,7 @@ fn vqtbx3q(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, e: uint8x16_t
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16908,8 +20178,40 @@ pub fn vqtbx3_s8(a: int8x8_t, b: int8x16x3_t, c: uint8x8_t) -> int8x8_t {
     vqtbx3(a, b.0, b.1, b.2, c)
 }
 #[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_s8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbx3_s8(a: int8x8_t, b: int8x16x3_t, c: uint8x8_t) -> int8x8_t {
+    let mut b: int8x16x3_t = b;
+    unsafe {
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        b.0 = simd_shuffle!(
+            b.0,
+            b.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.1 = simd_shuffle!(
+            b.1,
+            b.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.2 = simd_shuffle!(
+            b.2,
+            b.2,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = vqtbx3(a, b.0, b.1, b.2, c);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16917,8 +20219,46 @@ pub fn vqtbx3q_s8(a: int8x16_t, b: int8x16x3_t, c: uint8x16_t) -> int8x16_t {
     vqtbx3q(a, b.0, b.1, b.2, c)
 }
 #[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_s8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbx3q_s8(a: int8x16_t, b: int8x16x3_t, c: uint8x16_t) -> int8x16_t {
+    let mut b: int8x16x3_t = b;
+    unsafe {
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        b.0 = simd_shuffle!(
+            b.0,
+            b.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.1 = simd_shuffle!(
+            b.1,
+            b.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.2 = simd_shuffle!(
+            b.2,
+            b.2,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let c: uint8x16_t =
+            simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = vqtbx3q(a, b.0, b.1, b.2, c);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16934,8 +20274,46 @@ pub fn vqtbx3_u8(a: uint8x8_t, b: uint8x16x3_t, c: uint8x8_t) -> uint8x8_t {
     }
 }
 #[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbx3_u8(a: uint8x8_t, b: uint8x16x3_t, c: uint8x8_t) -> uint8x8_t {
+    let mut b: uint8x16x3_t = b;
+    unsafe {
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        b.0 = simd_shuffle!(
+            b.0,
+            b.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.1 = simd_shuffle!(
+            b.1,
+            b.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.2 = simd_shuffle!(
+            b.2,
+            b.2,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x8_t = transmute(vqtbx3(
+            transmute(a),
+            transmute(b.0),
+            transmute(b.1),
+            transmute(b.2),
+            c,
+        ));
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16951,8 +20329,52 @@ pub fn vqtbx3q_u8(a: uint8x16_t, b: uint8x16x3_t, c: uint8x16_t) -> uint8x16_t {
     }
 }
 #[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbx3q_u8(a: uint8x16_t, b: uint8x16x3_t, c: uint8x16_t) -> uint8x16_t {
+    let mut b: uint8x16x3_t = b;
+    unsafe {
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        b.0 = simd_shuffle!(
+            b.0,
+            b.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.1 = simd_shuffle!(
+            b.1,
+            b.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.2 = simd_shuffle!(
+            b.2,
+            b.2,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let c: uint8x16_t =
+            simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t = transmute(vqtbx3q(
+            transmute(a),
+            transmute(b.0),
+            transmute(b.1),
+            transmute(b.2),
+            c,
+        ));
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16968,8 +20390,46 @@ pub fn vqtbx3_p8(a: poly8x8_t, b: poly8x16x3_t, c: uint8x8_t) -> poly8x8_t {
     }
 }
 #[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_p8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbx3_p8(a: poly8x8_t, b: poly8x16x3_t, c: uint8x8_t) -> poly8x8_t {
+    let mut b: poly8x16x3_t = b;
+    unsafe {
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        b.0 = simd_shuffle!(
+            b.0,
+            b.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.1 = simd_shuffle!(
+            b.1,
+            b.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.2 = simd_shuffle!(
+            b.2,
+            b.2,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x8_t = transmute(vqtbx3(
+            transmute(a),
+            transmute(b.0),
+            transmute(b.1),
+            transmute(b.2),
+            c,
+        ));
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -16985,7 +20445,50 @@ pub fn vqtbx3q_p8(a: poly8x16_t, b: poly8x16x3_t, c: uint8x16_t) -> poly8x16_t {
     }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_p8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbx3q_p8(a: poly8x16_t, b: poly8x16x3_t, c: uint8x16_t) -> poly8x16_t {
+    let mut b: poly8x16x3_t = b;
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        b.0 = simd_shuffle!(
+            b.0,
+            b.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.1 = simd_shuffle!(
+            b.1,
+            b.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.2 = simd_shuffle!(
+            b.2,
+            b.2,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let c: uint8x16_t =
+            simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t = transmute(vqtbx3q(
+            transmute(a),
+            transmute(b.0),
+            transmute(b.1),
+            transmute(b.2),
+            c,
+        ));
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
@@ -17047,6 +20550,7 @@ fn vqtbx4q(
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -17054,8 +20558,45 @@ pub fn vqtbx4_s8(a: int8x8_t, b: int8x16x4_t, c: uint8x8_t) -> int8x8_t {
     vqtbx4(a, b.0, b.1, b.2, b.3, c)
 }
 #[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_s8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbx4_s8(a: int8x8_t, b: int8x16x4_t, c: uint8x8_t) -> int8x8_t {
+    let mut b: int8x16x4_t = b;
+    unsafe {
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        b.0 = simd_shuffle!(
+            b.0,
+            b.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.1 = simd_shuffle!(
+            b.1,
+            b.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.2 = simd_shuffle!(
+            b.2,
+            b.2,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.3 = simd_shuffle!(
+            b.3,
+            b.3,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = vqtbx4(a, b.0, b.1, b.2, b.3, c);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -17063,8 +20604,51 @@ pub fn vqtbx4q_s8(a: int8x16_t, b: int8x16x4_t, c: uint8x16_t) -> int8x16_t {
     vqtbx4q(a, b.0, b.1, b.2, b.3, c)
 }
 #[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_s8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbx4q_s8(a: int8x16_t, b: int8x16x4_t, c: uint8x16_t) -> int8x16_t {
+    let mut b: int8x16x4_t = b;
+    unsafe {
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        b.0 = simd_shuffle!(
+            b.0,
+            b.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.1 = simd_shuffle!(
+            b.1,
+            b.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.2 = simd_shuffle!(
+            b.2,
+            b.2,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.3 = simd_shuffle!(
+            b.3,
+            b.3,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let c: uint8x16_t =
+            simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = vqtbx4q(a, b.0, b.1, b.2, b.3, c);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -17081,8 +20665,52 @@ pub fn vqtbx4_u8(a: uint8x8_t, b: uint8x16x4_t, c: uint8x8_t) -> uint8x8_t {
     }
 }
 #[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbx4_u8(a: uint8x8_t, b: uint8x16x4_t, c: uint8x8_t) -> uint8x8_t {
+    let mut b: uint8x16x4_t = b;
+    unsafe {
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        b.0 = simd_shuffle!(
+            b.0,
+            b.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.1 = simd_shuffle!(
+            b.1,
+            b.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.2 = simd_shuffle!(
+            b.2,
+            b.2,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.3 = simd_shuffle!(
+            b.3,
+            b.3,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x8_t = transmute(vqtbx4(
+            transmute(a),
+            transmute(b.0),
+            transmute(b.1),
+            transmute(b.2),
+            transmute(b.3),
+            c,
+        ));
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -17099,8 +20727,58 @@ pub fn vqtbx4q_u8(a: uint8x16_t, b: uint8x16x4_t, c: uint8x16_t) -> uint8x16_t {
     }
 }
 #[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbx4q_u8(a: uint8x16_t, b: uint8x16x4_t, c: uint8x16_t) -> uint8x16_t {
+    let mut b: uint8x16x4_t = b;
+    unsafe {
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        b.0 = simd_shuffle!(
+            b.0,
+            b.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.1 = simd_shuffle!(
+            b.1,
+            b.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.2 = simd_shuffle!(
+            b.2,
+            b.2,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.3 = simd_shuffle!(
+            b.3,
+            b.3,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let c: uint8x16_t =
+            simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t = transmute(vqtbx4q(
+            transmute(a),
+            transmute(b.0),
+            transmute(b.1),
+            transmute(b.2),
+            transmute(b.3),
+            c,
+        ));
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -17117,8 +20795,52 @@ pub fn vqtbx4_p8(a: poly8x8_t, b: poly8x16x4_t, c: uint8x8_t) -> poly8x8_t {
     }
 }
 #[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_p8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbx4_p8(a: poly8x8_t, b: poly8x16x4_t, c: uint8x8_t) -> poly8x8_t {
+    let mut b: poly8x16x4_t = b;
+    unsafe {
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        b.0 = simd_shuffle!(
+            b.0,
+            b.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.1 = simd_shuffle!(
+            b.1,
+            b.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.2 = simd_shuffle!(
+            b.2,
+            b.2,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.3 = simd_shuffle!(
+            b.3,
+            b.3,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x8_t = transmute(vqtbx4(
+            transmute(a),
+            transmute(b.0),
+            transmute(b.1),
+            transmute(b.2),
+            transmute(b.3),
+            c,
+        ));
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -17134,6 +20856,55 @@ pub fn vqtbx4q_p8(a: poly8x16_t, b: poly8x16x4_t, c: uint8x16_t) -> poly8x16_t {
         ))
     }
 }
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_p8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqtbx4q_p8(a: poly8x16_t, b: poly8x16x4_t, c: uint8x16_t) -> poly8x16_t {
+    let mut b: poly8x16x4_t = b;
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        b.0 = simd_shuffle!(
+            b.0,
+            b.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.1 = simd_shuffle!(
+            b.1,
+            b.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.2 = simd_shuffle!(
+            b.2,
+            b.2,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        b.3 = simd_shuffle!(
+            b.3,
+            b.3,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let c: uint8x16_t =
+            simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t = transmute(vqtbx4q(
+            transmute(a),
+            transmute(b.0),
+            transmute(b.1),
+            transmute(b.2),
+            transmute(b.3),
+            c,
+        ));
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
 #[doc = "Rotate and exclusive OR"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrax1q_u64)"]
 #[inline]
@@ -17418,7 +21189,6 @@ pub fn vrecpxh_f16(a: f16) -> f16 {
 #[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
@@ -17427,125 +21197,64 @@ pub fn vreinterpret_f64_f16(a: float16x4_t) -> float64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_f16(a: float16x4_t) -> float64x1_t {
-    unsafe {
-        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpretq_f64_f16(a: float16x8_t) -> float64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_f16(a: float16x8_t) -> float64x2_t {
+pub fn vreinterpret_f16_f64(a: float64x1_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_f16(a: float16x8_t) -> float64x2_t {
-    unsafe {
-        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_f16_f64(a: float64x2_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
-#[cfg(not(target_arch = "arm64ec"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f16_f64(a: float64x1_t) -> float16x4_t {
+pub fn vreinterpret_s64_f64(a: float64x1_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
-#[cfg(not(target_arch = "arm64ec"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f16_f64(a: float64x1_t) -> float16x4_t {
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_u64_f64(a: float64x1_t) -> uint64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
-#[cfg(not(target_arch = "arm64ec"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f16_f64(a: float64x2_t) -> float16x8_t {
+pub fn vreinterpret_p64_f64(a: float64x1_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f16_f64(a: float64x2_t) -> float16x8_t {
-    unsafe {
-        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_s64_f64(a: float64x1_t) -> int64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_u64_f64(a: float64x1_t) -> uint64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_p64_f64(a: float64x1_t) -> poly64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -17700,7 +21409,6 @@ pub fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t {
 #[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p128)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
@@ -17708,2935 +21416,2595 @@ pub fn vreinterpretq_f64_p128(a: p128) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_p128(a: p128) -> float64x2_t {
-    unsafe {
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t {
+pub fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t {
-    unsafe {
-        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t {
+pub fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t {
-    unsafe {
-        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t {
+pub fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t {
-    unsafe {
-        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t {
+pub fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t {
-    unsafe {
-        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t {
+pub fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t {
-    unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t {
+pub fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t {
-    unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t {
+pub fn vreinterpretq_p128_f64(a: float64x2_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t {
-    unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t {
+pub fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t {
-    unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t {
+pub fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t {
-    unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t {
+pub fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t {
-    unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t {
+pub fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t {
-    unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t {
+pub fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t {
-    unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t {
+pub fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t {
-    unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p128_f64(a: float64x2_t) -> p128 {
+pub fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p128_f64(a: float64x2_t) -> p128 {
-    unsafe {
-        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
+pub fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
-    unsafe {
-        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t {
+pub fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t {
-    unsafe {
-        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t {
+pub fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t {
-    unsafe {
-        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t {
+pub fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t {
-    unsafe {
-        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t {
+pub fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t {
-    unsafe {
-        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t {
+pub fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t {
-    unsafe {
-        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t {
+pub fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f64)"]
+#[doc = "Floating-point round to 32-bit integer, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32x_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t {
-    unsafe {
-        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
+pub fn vrnd32x_f32(a: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint32x.v2f32"
+        )]
+        fn _vrnd32x_f32(a: float32x2_t) -> float32x2_t;
     }
+    unsafe { _vrnd32x_f32(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f64)"]
+#[doc = "Floating-point round to 32-bit integer, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32xq_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t {
-    unsafe {
-        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
+pub fn vrnd32xq_f32(a: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint32x.v4f32"
+        )]
+        fn _vrnd32xq_f32(a: float32x4_t) -> float32x4_t;
     }
+    unsafe { _vrnd32xq_f32(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f64)"]
+#[doc = "Floating-point round to 32-bit integer, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32xq_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t {
-    unsafe { transmute(a) }
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
+pub fn vrnd32xq_f64(a: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint32x.v2f64"
+        )]
+        fn _vrnd32xq_f64(a: float64x2_t) -> float64x2_t;
+    }
+    unsafe { _vrnd32xq_f64(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f64)"]
+#[doc = "Floating-point round to 32-bit integer, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32x_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t {
-    unsafe {
-        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
+pub fn vrnd32x_f64(a: float64x1_t) -> float64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.frint32x.f64"
+        )]
+        fn _vrnd32x_f64(a: f64) -> f64;
     }
+    unsafe { transmute(_vrnd32x_f64(vget_lane_f64::<0>(a))) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s8)"]
+#[doc = "Floating-point round to 32-bit integer toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t {
-    unsafe { transmute(a) }
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
+pub fn vrnd32z_f32(a: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint32z.v2f32"
+        )]
+        fn _vrnd32z_f32(a: float32x2_t) -> float32x2_t;
+    }
+    unsafe { _vrnd32z_f32(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s8)"]
+#[doc = "Floating-point round to 32-bit integer toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32zq_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t {
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
+pub fn vrnd32zq_f32(a: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint32z.v4f32"
+        )]
+        fn _vrnd32zq_f32(a: float32x4_t) -> float32x4_t;
     }
+    unsafe { _vrnd32zq_f32(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s8)"]
+#[doc = "Floating-point round to 32-bit integer toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32zq_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t {
-    unsafe { transmute(a) }
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
+pub fn vrnd32zq_f64(a: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint32z.v2f64"
+        )]
+        fn _vrnd32zq_f64(a: float64x2_t) -> float64x2_t;
+    }
+    unsafe { _vrnd32zq_f64(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s8)"]
+#[doc = "Floating-point round to 32-bit integer toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t {
-    unsafe {
-        let a: int8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
+pub fn vrnd32z_f64(a: float64x1_t) -> float64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.frint32z.f64"
+        )]
+        fn _vrnd32z_f64(a: f64) -> f64;
     }
+    unsafe { transmute(_vrnd32z_f64(vget_lane_f64::<0>(a))) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s16)"]
+#[doc = "Floating-point round to 64-bit integer, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t {
-    unsafe { transmute(a) }
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
+pub fn vrnd64x_f32(a: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint64x.v2f32"
+        )]
+        fn _vrnd64x_f32(a: float32x2_t) -> float32x2_t;
+    }
+    unsafe { _vrnd64x_f32(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s16)"]
+#[doc = "Floating-point round to 64-bit integer, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64xq_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t {
-    unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
+pub fn vrnd64xq_f32(a: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint64x.v4f32"
+        )]
+        fn _vrnd64xq_f32(a: float32x4_t) -> float32x4_t;
     }
+    unsafe { _vrnd64xq_f32(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s16)"]
+#[doc = "Floating-point round to 64-bit integer, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64xq_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t {
-    unsafe { transmute(a) }
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
+pub fn vrnd64xq_f64(a: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint64x.v2f64"
+        )]
+        fn _vrnd64xq_f64(a: float64x2_t) -> float64x2_t;
+    }
+    unsafe { _vrnd64xq_f64(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s16)"]
+#[doc = "Floating-point round to 64-bit integer, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t {
-    unsafe {
-        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
+pub fn vrnd64x_f64(a: float64x1_t) -> float64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.frint64x.f64"
+        )]
+        fn _vrnd64x_f64(a: f64) -> f64;
     }
+    unsafe { transmute(_vrnd64x_f64(vget_lane_f64::<0>(a))) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s32)"]
+#[doc = "Floating-point round to 64-bit integer toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t {
-    unsafe { transmute(a) }
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
+pub fn vrnd64z_f32(a: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint64z.v2f32"
+        )]
+        fn _vrnd64z_f32(a: float32x2_t) -> float32x2_t;
+    }
+    unsafe { _vrnd64z_f32(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s32)"]
+#[doc = "Floating-point round to 64-bit integer toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64zq_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t {
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        transmute(a)
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
+pub fn vrnd64zq_f32(a: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint64z.v4f32"
+        )]
+        fn _vrnd64zq_f32(a: float32x4_t) -> float32x4_t;
     }
+    unsafe { _vrnd64zq_f32(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s32)"]
+#[doc = "Floating-point round to 64-bit integer toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64zq_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t {
-    unsafe { transmute(a) }
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
+pub fn vrnd64zq_f64(a: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint64z.v2f64"
+        )]
+        fn _vrnd64zq_f64(a: float64x2_t) -> float64x2_t;
+    }
+    unsafe { _vrnd64zq_f64(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s32)"]
+#[doc = "Floating-point round to 64-bit integer toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t {
-    unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
+pub fn vrnd64z_f64(a: float64x1_t) -> float64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.frint64z.f64"
+        )]
+        fn _vrnd64z_f64(a: f64) -> f64;
     }
+    unsafe { transmute(_vrnd64z_f64(vget_lane_f64::<0>(a))) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u8)"]
+#[doc = "Floating-point round to integral, toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t {
-    unsafe { transmute(a) }
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(frintz))]
+pub fn vrnd_f16(a: float16x4_t) -> float16x4_t {
+    unsafe { simd_trunc(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u8)"]
+#[doc = "Floating-point round to integral, toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t {
-    unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
-    }
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(frintz))]
+pub fn vrndq_f16(a: float16x8_t) -> float16x8_t {
+    unsafe { simd_trunc(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u8)"]
+#[doc = "Floating-point round to integral, toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t {
-    unsafe { transmute(a) }
+#[cfg_attr(test, assert_instr(frintz))]
+pub fn vrnd_f32(a: float32x2_t) -> float32x2_t {
+    unsafe { simd_trunc(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u8)"]
+#[doc = "Floating-point round to integral, toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t {
-    unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+#[cfg_attr(test, assert_instr(frintz))]
+pub fn vrndq_f32(a: float32x4_t) -> float32x4_t {
+    unsafe { simd_trunc(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u16)"]
+#[doc = "Floating-point round to integral, toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t {
-    unsafe { transmute(a) }
+#[cfg_attr(test, assert_instr(frintz))]
+pub fn vrnd_f64(a: float64x1_t) -> float64x1_t {
+    unsafe { simd_trunc(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u16)"]
+#[doc = "Floating-point round to integral, toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t {
-    unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
-    }
+#[cfg_attr(test, assert_instr(frintz))]
+pub fn vrndq_f64(a: float64x2_t) -> float64x2_t {
+    unsafe { simd_trunc(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u16)"]
+#[doc = "Floating-point round to integral, to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t {
-    unsafe { transmute(a) }
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(frinta))]
+pub fn vrnda_f16(a: float16x4_t) -> float16x4_t {
+    unsafe { simd_round(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u16)"]
+#[doc = "Floating-point round to integral, to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t {
-    unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(frinta))]
+pub fn vrndaq_f16(a: float16x8_t) -> float16x8_t {
+    unsafe { simd_round(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u32)"]
+#[doc = "Floating-point round to integral, to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t {
-    unsafe { transmute(a) }
+#[cfg_attr(test, assert_instr(frinta))]
+pub fn vrnda_f32(a: float32x2_t) -> float32x2_t {
+    unsafe { simd_round(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u32)"]
+#[doc = "Floating-point round to integral, to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t {
-    unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
-        transmute(a)
-    }
+#[cfg_attr(test, assert_instr(frinta))]
+pub fn vrndaq_f32(a: float32x4_t) -> float32x4_t {
+    unsafe { simd_round(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u32)"]
+#[doc = "Floating-point round to integral, to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t {
-    unsafe { transmute(a) }
+#[cfg_attr(test, assert_instr(frinta))]
+pub fn vrnda_f64(a: float64x1_t) -> float64x1_t {
+    unsafe { simd_round(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u32)"]
+#[doc = "Floating-point round to integral, to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t {
-    unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+#[cfg_attr(test, assert_instr(frinta))]
+pub fn vrndaq_f64(a: float64x2_t) -> float64x2_t {
+    unsafe { simd_round(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p8)"]
+#[doc = "Floating-point round to integral, to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndah_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t {
-    unsafe { transmute(a) }
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(frinta))]
+pub fn vrndah_f16(a: f16) -> f16 {
+    roundf16(a)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p8)"]
+#[doc = "Floating-point round to integral, to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndh_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t {
-    unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
-    }
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(frintz))]
+pub fn vrndh_f16(a: f16) -> f16 {
+    truncf16(a)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p8)"]
+#[doc = "Floating-point round to integral, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t {
-    unsafe { transmute(a) }
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(frinti))]
+pub fn vrndi_f16(a: float16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.nearbyint.v4f16"
+        )]
+        fn _vrndi_f16(a: float16x4_t) -> float16x4_t;
+    }
+    unsafe { _vrndi_f16(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p8)"]
+#[doc = "Floating-point round to integral, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t {
-    unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(frinti))]
+pub fn vrndiq_f16(a: float16x8_t) -> float16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.nearbyint.v8f16"
+        )]
+        fn _vrndiq_f16(a: float16x8_t) -> float16x8_t;
     }
+    unsafe { _vrndiq_f16(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p16)"]
+#[doc = "Floating-point round to integral, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t {
-    unsafe { transmute(a) }
+#[cfg_attr(test, assert_instr(frinti))]
+pub fn vrndi_f32(a: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.nearbyint.v2f32"
+        )]
+        fn _vrndi_f32(a: float32x2_t) -> float32x2_t;
+    }
+    unsafe { _vrndi_f32(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p16)"]
+#[doc = "Floating-point round to integral, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t {
-    unsafe {
-        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
+#[cfg_attr(test, assert_instr(frinti))]
+pub fn vrndiq_f32(a: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.nearbyint.v4f32"
+        )]
+        fn _vrndiq_f32(a: float32x4_t) -> float32x4_t;
     }
+    unsafe { _vrndiq_f32(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p16)"]
+#[doc = "Floating-point round to integral, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p16)"]
-#[inline]
-#[cfg(target_endian = "big")]
+#[cfg_attr(test, assert_instr(frinti))]
+pub fn vrndi_f64(a: float64x1_t) -> float64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.nearbyint.v1f64"
+        )]
+        fn _vrndi_f64(a: float64x1_t) -> float64x1_t;
+    }
+    unsafe { _vrndi_f64(a) }
+}
+#[doc = "Floating-point round to integral, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f64)"]
+#[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t {
-    unsafe {
-        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+#[cfg_attr(test, assert_instr(frinti))]
+pub fn vrndiq_f64(a: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.nearbyint.v2f64"
+        )]
+        fn _vrndiq_f64(a: float64x2_t) -> float64x2_t;
     }
+    unsafe { _vrndiq_f64(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p64)"]
+#[doc = "Floating-point round to integral, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndih_f16)"]
+#[inline]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(frinti))]
+pub fn vrndih_f16(a: f16) -> f16 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.nearbyint.f16"
+        )]
+        fn _vrndih_f16(a: f16) -> f16;
+    }
+    unsafe { _vrndih_f16(a) }
+}
+#[doc = "Floating-point round to integral, toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f16)"]
+#[inline]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(frintm))]
+pub fn vrndm_f16(a: float16x4_t) -> float16x4_t {
+    unsafe { simd_floor(a) }
+}
+#[doc = "Floating-point round to integral, toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f16)"]
+#[inline]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(frintm))]
+pub fn vrndmq_f16(a: float16x8_t) -> float16x8_t {
+    unsafe { simd_floor(a) }
+}
+#[doc = "Floating-point round to integral, toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t {
-    unsafe { transmute(a) }
+#[cfg_attr(test, assert_instr(frintm))]
+pub fn vrndm_f32(a: float32x2_t) -> float32x2_t {
+    unsafe { simd_floor(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p64)"]
+#[doc = "Floating-point round to integral, toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t {
-    unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+#[cfg_attr(test, assert_instr(frintm))]
+pub fn vrndmq_f32(a: float32x4_t) -> float32x4_t {
+    unsafe { simd_floor(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p64)"]
+#[doc = "Floating-point round to integral, toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t {
-    unsafe { transmute(a) }
+#[cfg_attr(test, assert_instr(frintm))]
+pub fn vrndm_f64(a: float64x1_t) -> float64x1_t {
+    unsafe { simd_floor(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p64)"]
+#[doc = "Floating-point round to integral, toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t {
-    unsafe {
-        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+#[cfg_attr(test, assert_instr(frintm))]
+pub fn vrndmq_f64(a: float64x2_t) -> float64x2_t {
+    unsafe { simd_floor(a) }
 }
-#[doc = "Floating-point round to 32-bit integer, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32x_f32)"]
+#[doc = "Floating-point round to integral, toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmh_f16)"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
-pub fn vrnd32x_f32(a: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frint32x.v2f32"
-        )]
-        fn _vrnd32x_f32(a: float32x2_t) -> float32x2_t;
-    }
-    unsafe { _vrnd32x_f32(a) }
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(frintm))]
+pub fn vrndmh_f16(a: f16) -> f16 {
+    floorf16(a)
 }
-#[doc = "Floating-point round to 32-bit integer, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32xq_f32)"]
+#[doc = "Floating-point round to integral, to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f64)"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
-pub fn vrnd32xq_f32(a: float32x4_t) -> float32x4_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(frintn))]
+pub fn vrndn_f64(a: float64x1_t) -> float64x1_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frint32x.v4f32"
+            link_name = "llvm.roundeven.v1f64"
         )]
-        fn _vrnd32xq_f32(a: float32x4_t) -> float32x4_t;
+        fn _vrndn_f64(a: float64x1_t) -> float64x1_t;
     }
-    unsafe { _vrnd32xq_f32(a) }
+    unsafe { _vrndn_f64(a) }
 }
-#[doc = "Floating-point round to 32-bit integer, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32xq_f64)"]
+#[doc = "Floating-point round to integral, to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f64)"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
-pub fn vrnd32xq_f64(a: float64x2_t) -> float64x2_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(frintn))]
+pub fn vrndnq_f64(a: float64x2_t) -> float64x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frint32x.v2f64"
+            link_name = "llvm.roundeven.v2f64"
         )]
-        fn _vrnd32xq_f64(a: float64x2_t) -> float64x2_t;
+        fn _vrndnq_f64(a: float64x2_t) -> float64x2_t;
     }
-    unsafe { _vrnd32xq_f64(a) }
+    unsafe { _vrndnq_f64(a) }
 }
-#[doc = "Floating-point round to 32-bit integer, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32x_f64)"]
+#[doc = "Floating-point round to integral, toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnh_f16)"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
-pub fn vrnd32x_f64(a: float64x1_t) -> float64x1_t {
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(frintn))]
+pub fn vrndnh_f16(a: f16) -> f16 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.frint32x.f64"
+            link_name = "llvm.roundeven.f16"
         )]
-        fn _vrnd32x_f64(a: f64) -> f64;
+        fn _vrndnh_f16(a: f16) -> f16;
     }
-    unsafe { transmute(_vrnd32x_f64(vget_lane_f64::<0>(a))) }
+    unsafe { _vrndnh_f16(a) }
 }
-#[doc = "Floating-point round to 32-bit integer toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f32)"]
+#[doc = "Floating-point round to integral, to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndns_f32)"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
-pub fn vrnd32z_f32(a: float32x2_t) -> float32x2_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(frintn))]
+pub fn vrndns_f32(a: f32) -> f32 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frint32z.v2f32"
+            link_name = "llvm.roundeven.f32"
         )]
-        fn _vrnd32z_f32(a: float32x2_t) -> float32x2_t;
+        fn _vrndns_f32(a: f32) -> f32;
     }
-    unsafe { _vrnd32z_f32(a) }
+    unsafe { _vrndns_f32(a) }
 }
-#[doc = "Floating-point round to 32-bit integer toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32zq_f32)"]
+#[doc = "Floating-point round to integral, toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f16)"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
-pub fn vrnd32zq_f32(a: float32x4_t) -> float32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frint32z.v4f32"
-        )]
-        fn _vrnd32zq_f32(a: float32x4_t) -> float32x4_t;
-    }
-    unsafe { _vrnd32zq_f32(a) }
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(frintp))]
+pub fn vrndp_f16(a: float16x4_t) -> float16x4_t {
+    unsafe { simd_ceil(a) }
 }
-#[doc = "Floating-point round to 32-bit integer toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32zq_f64)"]
+#[doc = "Floating-point round to integral, toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f16)"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
-pub fn vrnd32zq_f64(a: float64x2_t) -> float64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frint32z.v2f64"
-        )]
-        fn _vrnd32zq_f64(a: float64x2_t) -> float64x2_t;
-    }
-    unsafe { _vrnd32zq_f64(a) }
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(frintp))]
+pub fn vrndpq_f16(a: float16x8_t) -> float16x8_t {
+    unsafe { simd_ceil(a) }
 }
-#[doc = "Floating-point round to 32-bit integer toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f64)"]
-#[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
-pub fn vrnd32z_f64(a: float64x1_t) -> float64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.frint32z.f64"
-        )]
-        fn _vrnd32z_f64(a: f64) -> f64;
-    }
-    unsafe { transmute(_vrnd32z_f64(vget_lane_f64::<0>(a))) }
-}
-#[doc = "Floating-point round to 64-bit integer, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f32)"]
-#[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
-pub fn vrnd64x_f32(a: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frint64x.v2f32"
-        )]
-        fn _vrnd64x_f32(a: float32x2_t) -> float32x2_t;
-    }
-    unsafe { _vrnd64x_f32(a) }
-}
-#[doc = "Floating-point round to 64-bit integer, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64xq_f32)"]
-#[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
-pub fn vrnd64xq_f32(a: float32x4_t) -> float32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frint64x.v4f32"
-        )]
-        fn _vrnd64xq_f32(a: float32x4_t) -> float32x4_t;
-    }
-    unsafe { _vrnd64xq_f32(a) }
-}
-#[doc = "Floating-point round to 64-bit integer, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64xq_f64)"]
-#[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
-pub fn vrnd64xq_f64(a: float64x2_t) -> float64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frint64x.v2f64"
-        )]
-        fn _vrnd64xq_f64(a: float64x2_t) -> float64x2_t;
-    }
-    unsafe { _vrnd64xq_f64(a) }
-}
-#[doc = "Floating-point round to 64-bit integer, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f64)"]
+#[doc = "Floating-point round to integral, toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f32)"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
-pub fn vrnd64x_f64(a: float64x1_t) -> float64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.frint64x.f64"
-        )]
-        fn _vrnd64x_f64(a: f64) -> f64;
-    }
-    unsafe { transmute(_vrnd64x_f64(vget_lane_f64::<0>(a))) }
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(frintp))]
+pub fn vrndp_f32(a: float32x2_t) -> float32x2_t {
+    unsafe { simd_ceil(a) }
 }
-#[doc = "Floating-point round to 64-bit integer toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f32)"]
+#[doc = "Floating-point round to integral, toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f32)"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
-pub fn vrnd64z_f32(a: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frint64z.v2f32"
-        )]
-        fn _vrnd64z_f32(a: float32x2_t) -> float32x2_t;
-    }
-    unsafe { _vrnd64z_f32(a) }
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(frintp))]
+pub fn vrndpq_f32(a: float32x4_t) -> float32x4_t {
+    unsafe { simd_ceil(a) }
 }
-#[doc = "Floating-point round to 64-bit integer toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64zq_f32)"]
+#[doc = "Floating-point round to integral, toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f64)"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
-pub fn vrnd64zq_f32(a: float32x4_t) -> float32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frint64z.v4f32"
-        )]
-        fn _vrnd64zq_f32(a: float32x4_t) -> float32x4_t;
-    }
-    unsafe { _vrnd64zq_f32(a) }
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(frintp))]
+pub fn vrndp_f64(a: float64x1_t) -> float64x1_t {
+    unsafe { simd_ceil(a) }
 }
-#[doc = "Floating-point round to 64-bit integer toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64zq_f64)"]
+#[doc = "Floating-point round to integral, toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f64)"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
-pub fn vrnd64zq_f64(a: float64x2_t) -> float64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frint64z.v2f64"
-        )]
-        fn _vrnd64zq_f64(a: float64x2_t) -> float64x2_t;
-    }
-    unsafe { _vrnd64zq_f64(a) }
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(frintp))]
+pub fn vrndpq_f64(a: float64x2_t) -> float64x2_t {
+    unsafe { simd_ceil(a) }
 }
-#[doc = "Floating-point round to 64-bit integer toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f64)"]
+#[doc = "Floating-point round to integral, toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndph_f16)"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
-pub fn vrnd64z_f64(a: float64x1_t) -> float64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.frint64z.f64"
-        )]
-        fn _vrnd64z_f64(a: f64) -> f64;
-    }
-    unsafe { transmute(_vrnd64z_f64(vget_lane_f64::<0>(a))) }
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(frintp))]
+pub fn vrndph_f16(a: f16) -> f16 {
+    ceilf16(a)
 }
-#[doc = "Floating-point round to integral, toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f16)"]
+#[doc = "Floating-point round to integral exact, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(frintz))]
-pub fn vrnd_f16(a: float16x4_t) -> float16x4_t {
-    unsafe { simd_trunc(a) }
+#[cfg_attr(test, assert_instr(frintx))]
+pub fn vrndx_f16(a: float16x4_t) -> float16x4_t {
+    unsafe { simd_round_ties_even(a) }
 }
-#[doc = "Floating-point round to integral, toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f16)"]
+#[doc = "Floating-point round to integral exact, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(frintz))]
-pub fn vrndq_f16(a: float16x8_t) -> float16x8_t {
-    unsafe { simd_trunc(a) }
+#[cfg_attr(test, assert_instr(frintx))]
+pub fn vrndxq_f16(a: float16x8_t) -> float16x8_t {
+    unsafe { simd_round_ties_even(a) }
 }
-#[doc = "Floating-point round to integral, toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f32)"]
+#[doc = "Floating-point round to integral exact, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintz))]
-pub fn vrnd_f32(a: float32x2_t) -> float32x2_t {
-    unsafe { simd_trunc(a) }
+#[cfg_attr(test, assert_instr(frintx))]
+pub fn vrndx_f32(a: float32x2_t) -> float32x2_t {
+    unsafe { simd_round_ties_even(a) }
 }
-#[doc = "Floating-point round to integral, toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f32)"]
+#[doc = "Floating-point round to integral exact, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintz))]
-pub fn vrndq_f32(a: float32x4_t) -> float32x4_t {
-    unsafe { simd_trunc(a) }
+#[cfg_attr(test, assert_instr(frintx))]
+pub fn vrndxq_f32(a: float32x4_t) -> float32x4_t {
+    unsafe { simd_round_ties_even(a) }
 }
-#[doc = "Floating-point round to integral, toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f64)"]
+#[doc = "Floating-point round to integral exact, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintz))]
-pub fn vrnd_f64(a: float64x1_t) -> float64x1_t {
-    unsafe { simd_trunc(a) }
+#[cfg_attr(test, assert_instr(frintx))]
+pub fn vrndx_f64(a: float64x1_t) -> float64x1_t {
+    unsafe { simd_round_ties_even(a) }
 }
-#[doc = "Floating-point round to integral, toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f64)"]
+#[doc = "Floating-point round to integral exact, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintz))]
-pub fn vrndq_f64(a: float64x2_t) -> float64x2_t {
-    unsafe { simd_trunc(a) }
-}
-#[doc = "Floating-point round to integral, to nearest with ties to away"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f16)"]
-#[inline]
-#[target_feature(enable = "neon,fp16")]
-#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(frinta))]
-pub fn vrnda_f16(a: float16x4_t) -> float16x4_t {
-    unsafe { simd_round(a) }
+#[cfg_attr(test, assert_instr(frintx))]
+pub fn vrndxq_f64(a: float64x2_t) -> float64x2_t {
+    unsafe { simd_round_ties_even(a) }
 }
-#[doc = "Floating-point round to integral, to nearest with ties to away"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f16)"]
+#[doc = "Floating-point round to integral, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxh_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(frinta))]
-pub fn vrndaq_f16(a: float16x8_t) -> float16x8_t {
-    unsafe { simd_round(a) }
+#[cfg_attr(test, assert_instr(frintx))]
+pub fn vrndxh_f16(a: f16) -> f16 {
+    round_ties_even_f16(a)
 }
-#[doc = "Floating-point round to integral, to nearest with ties to away"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f32)"]
+#[doc = "Signed rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshld_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(srshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frinta))]
-pub fn vrnda_f32(a: float32x2_t) -> float32x2_t {
-    unsafe { simd_round(a) }
+pub fn vrshld_s64(a: i64, b: i64) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srshl.i64"
+        )]
+        fn _vrshld_s64(a: i64, b: i64) -> i64;
+    }
+    unsafe { _vrshld_s64(a, b) }
 }
-#[doc = "Floating-point round to integral, to nearest with ties to away"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f32)"]
+#[doc = "Unsigned rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshld_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(urshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frinta))]
-pub fn vrndaq_f32(a: float32x4_t) -> float32x4_t {
-    unsafe { simd_round(a) }
+pub fn vrshld_u64(a: u64, b: i64) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urshl.i64"
+        )]
+        fn _vrshld_u64(a: u64, b: i64) -> u64;
+    }
+    unsafe { _vrshld_u64(a, b) }
 }
-#[doc = "Floating-point round to integral, to nearest with ties to away"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f64)"]
+#[doc = "Signed rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrd_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(srshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frinta))]
-pub fn vrnda_f64(a: float64x1_t) -> float64x1_t {
-    unsafe { simd_round(a) }
+pub fn vrshrd_n_s64<const N: i32>(a: i64) -> i64 {
+    static_assert!(N >= 1 && N <= 64);
+    vrshld_s64(a, -N as i64)
 }
-#[doc = "Floating-point round to integral, to nearest with ties to away"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f64)"]
+#[doc = "Unsigned rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrd_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(urshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frinta))]
-pub fn vrndaq_f64(a: float64x2_t) -> float64x2_t {
-    unsafe { simd_round(a) }
+pub fn vrshrd_n_u64<const N: i32>(a: u64) -> u64 {
+    static_assert!(N >= 1 && N <= 64);
+    vrshld_u64(a, -N as i64)
 }
-#[doc = "Floating-point round to integral, to nearest with ties to away"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndah_f16)"]
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s16)"]
 #[inline]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(frinta))]
-pub fn vrndah_f16(a: f16) -> f16 {
-    roundf16(a)
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    vcombine_s8(a, vrshrn_n_s16::<N>(b))
 }
-#[doc = "Floating-point round to integral, to nearest with ties to away"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndh_f16)"]
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s32)"]
 #[inline]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(frintz))]
-pub fn vrndh_f16(a: f16) -> f16 {
-    truncf16(a)
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    vcombine_s16(a, vrshrn_n_s32::<N>(b))
 }
-#[doc = "Floating-point round to integral, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f16)"]
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s64)"]
 #[inline]
-#[target_feature(enable = "neon,fp16")]
-#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(frinti))]
-pub fn vrndi_f16(a: float16x4_t) -> float16x4_t {
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    vcombine_s32(a, vrshrn_n_s64::<N>(b))
+}
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    vcombine_u8(a, vrshrn_n_u16::<N>(b))
+}
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    vcombine_u16(a, vrshrn_n_u32::<N>(b))
+}
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    vcombine_u32(a, vrshrn_n_u64::<N>(b))
+}
+#[doc = "Reciprocal square-root estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frsqrte))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vrsqrte_f64(a: float64x1_t) -> float64x1_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.nearbyint.v4f16"
+            link_name = "llvm.aarch64.neon.frsqrte.v1f64"
         )]
-        fn _vrndi_f16(a: float16x4_t) -> float16x4_t;
+        fn _vrsqrte_f64(a: float64x1_t) -> float64x1_t;
     }
-    unsafe { _vrndi_f16(a) }
+    unsafe { _vrsqrte_f64(a) }
 }
-#[doc = "Floating-point round to integral, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f16)"]
+#[doc = "Reciprocal square-root estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f64)"]
 #[inline]
-#[target_feature(enable = "neon,fp16")]
-#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(frinti))]
-pub fn vrndiq_f16(a: float16x8_t) -> float16x8_t {
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frsqrte))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vrsqrteq_f64(a: float64x2_t) -> float64x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.nearbyint.v8f16"
+            link_name = "llvm.aarch64.neon.frsqrte.v2f64"
         )]
-        fn _vrndiq_f16(a: float16x8_t) -> float16x8_t;
+        fn _vrsqrteq_f64(a: float64x2_t) -> float64x2_t;
     }
-    unsafe { _vrndiq_f16(a) }
+    unsafe { _vrsqrteq_f64(a) }
 }
-#[doc = "Floating-point round to integral, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f32)"]
+#[doc = "Reciprocal square-root estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrted_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frsqrte))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frinti))]
-pub fn vrndi_f32(a: float32x2_t) -> float32x2_t {
+pub fn vrsqrted_f64(a: f64) -> f64 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.nearbyint.v2f32"
+            link_name = "llvm.aarch64.neon.frsqrte.f64"
         )]
-        fn _vrndi_f32(a: float32x2_t) -> float32x2_t;
+        fn _vrsqrted_f64(a: f64) -> f64;
     }
-    unsafe { _vrndi_f32(a) }
+    unsafe { _vrsqrted_f64(a) }
 }
-#[doc = "Floating-point round to integral, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f32)"]
+#[doc = "Reciprocal square-root estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtes_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frsqrte))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frinti))]
-pub fn vrndiq_f32(a: float32x4_t) -> float32x4_t {
+pub fn vrsqrtes_f32(a: f32) -> f32 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.nearbyint.v4f32"
+            link_name = "llvm.aarch64.neon.frsqrte.f32"
         )]
-        fn _vrndiq_f32(a: float32x4_t) -> float32x4_t;
+        fn _vrsqrtes_f32(a: f32) -> f32;
     }
-    unsafe { _vrndiq_f32(a) }
+    unsafe { _vrsqrtes_f32(a) }
 }
-#[doc = "Floating-point round to integral, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f64)"]
+#[doc = "Reciprocal square-root estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteh_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frinti))]
-pub fn vrndi_f64(a: float64x1_t) -> float64x1_t {
+#[cfg_attr(test, assert_instr(frsqrte))]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vrsqrteh_f16(a: f16) -> f16 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.nearbyint.v1f64"
+            link_name = "llvm.aarch64.neon.frsqrte.f16"
         )]
-        fn _vrndi_f64(a: float64x1_t) -> float64x1_t;
+        fn _vrsqrteh_f16(a: f16) -> f16;
     }
-    unsafe { _vrndi_f64(a) }
+    unsafe { _vrsqrteh_f16(a) }
 }
-#[doc = "Floating-point round to integral, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f64)"]
+#[doc = "Floating-point reciprocal square root step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frsqrts))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frinti))]
-pub fn vrndiq_f64(a: float64x2_t) -> float64x2_t {
+pub fn vrsqrts_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.nearbyint.v2f64"
+            link_name = "llvm.aarch64.neon.frsqrts.v1f64"
         )]
-        fn _vrndiq_f64(a: float64x2_t) -> float64x2_t;
+        fn _vrsqrts_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
     }
-    unsafe { _vrndiq_f64(a) }
+    unsafe { _vrsqrts_f64(a, b) }
 }
-#[doc = "Floating-point round to integral, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndih_f16)"]
+#[doc = "Floating-point reciprocal square root step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f64)"]
 #[inline]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(frinti))]
-pub fn vrndih_f16(a: f16) -> f16 {
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frsqrts))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vrsqrtsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.nearbyint.f16"
+            link_name = "llvm.aarch64.neon.frsqrts.v2f64"
         )]
-        fn _vrndih_f16(a: f16) -> f16;
+        fn _vrsqrtsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
-    unsafe { _vrndih_f16(a) }
+    unsafe { _vrsqrtsq_f64(a, b) }
 }
-#[doc = "Floating-point round to integral, toward minus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f16)"]
-#[inline]
-#[target_feature(enable = "neon,fp16")]
-#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(frintm))]
-pub fn vrndm_f16(a: float16x4_t) -> float16x4_t {
-    unsafe { simd_floor(a) }
-}
-#[doc = "Floating-point round to integral, toward minus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f16)"]
-#[inline]
-#[target_feature(enable = "neon,fp16")]
-#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(frintm))]
-pub fn vrndmq_f16(a: float16x8_t) -> float16x8_t {
-    unsafe { simd_floor(a) }
-}
-#[doc = "Floating-point round to integral, toward minus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintm))]
-pub fn vrndm_f32(a: float32x2_t) -> float32x2_t {
-    unsafe { simd_floor(a) }
-}
-#[doc = "Floating-point round to integral, toward minus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintm))]
-pub fn vrndmq_f32(a: float32x4_t) -> float32x4_t {
-    unsafe { simd_floor(a) }
-}
-#[doc = "Floating-point round to integral, toward minus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintm))]
-pub fn vrndm_f64(a: float64x1_t) -> float64x1_t {
-    unsafe { simd_floor(a) }
-}
-#[doc = "Floating-point round to integral, toward minus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintm))]
-pub fn vrndmq_f64(a: float64x2_t) -> float64x2_t {
-    unsafe { simd_floor(a) }
-}
-#[doc = "Floating-point round to integral, toward minus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmh_f16)"]
-#[inline]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(frintm))]
-pub fn vrndmh_f16(a: f16) -> f16 {
-    floorf16(a)
-}
-#[doc = "Floating-point round to integral, to nearest with ties to even"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f64)"]
+#[doc = "Floating-point reciprocal square root step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsd_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frsqrts))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintn))]
-pub fn vrndn_f64(a: float64x1_t) -> float64x1_t {
+pub fn vrsqrtsd_f64(a: f64, b: f64) -> f64 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.roundeven.v1f64"
+            link_name = "llvm.aarch64.neon.frsqrts.f64"
         )]
-        fn _vrndn_f64(a: float64x1_t) -> float64x1_t;
+        fn _vrsqrtsd_f64(a: f64, b: f64) -> f64;
     }
-    unsafe { _vrndn_f64(a) }
+    unsafe { _vrsqrtsd_f64(a, b) }
 }
-#[doc = "Floating-point round to integral, to nearest with ties to even"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f64)"]
+#[doc = "Floating-point reciprocal square root step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtss_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frsqrts))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintn))]
-pub fn vrndnq_f64(a: float64x2_t) -> float64x2_t {
+pub fn vrsqrtss_f32(a: f32, b: f32) -> f32 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.roundeven.v2f64"
+            link_name = "llvm.aarch64.neon.frsqrts.f32"
         )]
-        fn _vrndnq_f64(a: float64x2_t) -> float64x2_t;
+        fn _vrsqrtss_f32(a: f32, b: f32) -> f32;
     }
-    unsafe { _vrndnq_f64(a) }
+    unsafe { _vrsqrtss_f32(a, b) }
 }
-#[doc = "Floating-point round to integral, toward minus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnh_f16)"]
+#[doc = "Floating-point reciprocal square root step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsh_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
+#[cfg_attr(test, assert_instr(frsqrts))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(frintn))]
-pub fn vrndnh_f16(a: f16) -> f16 {
+pub fn vrsqrtsh_f16(a: f16, b: f16) -> f16 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.roundeven.f16"
+            link_name = "llvm.aarch64.neon.frsqrts.f16"
         )]
-        fn _vrndnh_f16(a: f16) -> f16;
+        fn _vrsqrtsh_f16(a: f16, b: f16) -> f16;
     }
-    unsafe { _vrndnh_f16(a) }
+    unsafe { _vrsqrtsh_f16(a, b) }
 }
-#[doc = "Floating-point round to integral, to nearest with ties to even"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndns_f32)"]
+#[doc = "Signed rounding shift right and accumulate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsrad_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(srshr, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintn))]
-pub fn vrndns_f32(a: f32) -> f32 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.roundeven.f32"
-        )]
-        fn _vrndns_f32(a: f32) -> f32;
-    }
-    unsafe { _vrndns_f32(a) }
-}
-#[doc = "Floating-point round to integral, toward plus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f16)"]
-#[inline]
-#[target_feature(enable = "neon,fp16")]
-#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(frintp))]
-pub fn vrndp_f16(a: float16x4_t) -> float16x4_t {
-    unsafe { simd_ceil(a) }
-}
-#[doc = "Floating-point round to integral, toward plus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f16)"]
-#[inline]
-#[target_feature(enable = "neon,fp16")]
-#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(frintp))]
-pub fn vrndpq_f16(a: float16x8_t) -> float16x8_t {
-    unsafe { simd_ceil(a) }
+pub fn vrsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
+    static_assert!(N >= 1 && N <= 64);
+    let b: i64 = vrshrd_n_s64::<N>(b);
+    a.wrapping_add(b)
 }
-#[doc = "Floating-point round to integral, toward plus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f32)"]
+#[doc = "Unsigned rounding shift right and accumulate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsrad_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(urshr, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintp))]
-pub fn vrndp_f32(a: float32x2_t) -> float32x2_t {
-    unsafe { simd_ceil(a) }
+pub fn vrsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
+    static_assert!(N >= 1 && N <= 64);
+    let b: u64 = vrshrd_n_u64::<N>(b);
+    a.wrapping_add(b)
 }
-#[doc = "Floating-point round to integral, toward plus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f32)"]
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_endian = "little")]
+#[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintp))]
-pub fn vrndpq_f32(a: float32x4_t) -> float32x4_t {
-    unsafe { simd_ceil(a) }
+pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
+    vcombine_s8(a, vrsubhn_s16(b, c))
 }
-#[doc = "Floating-point round to integral, toward plus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f64)"]
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_endian = "little")]
+#[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintp))]
-pub fn vrndp_f64(a: float64x1_t) -> float64x1_t {
-    unsafe { simd_ceil(a) }
+pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
+    vcombine_s16(a, vrsubhn_s32(b, c))
 }
-#[doc = "Floating-point round to integral, toward plus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f64)"]
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_endian = "little")]
+#[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintp))]
-pub fn vrndpq_f64(a: float64x2_t) -> float64x2_t {
-    unsafe { simd_ceil(a) }
-}
-#[doc = "Floating-point round to integral, toward plus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndph_f16)"]
-#[inline]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(frintp))]
-pub fn vrndph_f16(a: f16) -> f16 {
-    ceilf16(a)
-}
-#[doc = "Floating-point round to integral exact, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f16)"]
-#[inline]
-#[target_feature(enable = "neon,fp16")]
-#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(frintx))]
-pub fn vrndx_f16(a: float16x4_t) -> float16x4_t {
-    unsafe { simd_round_ties_even(a) }
-}
-#[doc = "Floating-point round to integral exact, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f16)"]
-#[inline]
-#[target_feature(enable = "neon,fp16")]
-#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(frintx))]
-pub fn vrndxq_f16(a: float16x8_t) -> float16x8_t {
-    unsafe { simd_round_ties_even(a) }
+pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
+    vcombine_s32(a, vrsubhn_s64(b, c))
 }
-#[doc = "Floating-point round to integral exact, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f32)"]
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_endian = "little")]
+#[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintx))]
-pub fn vrndx_f32(a: float32x2_t) -> float32x2_t {
-    unsafe { simd_round_ties_even(a) }
+pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
+    vcombine_u8(a, vrsubhn_u16(b, c))
 }
-#[doc = "Floating-point round to integral exact, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f32)"]
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_endian = "little")]
+#[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintx))]
-pub fn vrndxq_f32(a: float32x4_t) -> float32x4_t {
-    unsafe { simd_round_ties_even(a) }
+pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
+    vcombine_u16(a, vrsubhn_u32(b, c))
 }
-#[doc = "Floating-point round to integral exact, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f64)"]
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_endian = "little")]
+#[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintx))]
-pub fn vrndx_f64(a: float64x1_t) -> float64x1_t {
-    unsafe { simd_round_ties_even(a) }
+pub fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
+    vcombine_u32(a, vrsubhn_u64(b, c))
 }
-#[doc = "Floating-point round to integral exact, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f64)"]
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_endian = "big")]
+#[cfg_attr(test, assert_instr(rsubhn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintx))]
-pub fn vrndxq_f64(a: float64x2_t) -> float64x2_t {
-    unsafe { simd_round_ties_even(a) }
-}
-#[doc = "Floating-point round to integral, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxh_f16)"]
-#[inline]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(frintx))]
-pub fn vrndxh_f16(a: f16) -> f16 {
-    round_ties_even_f16(a)
-}
-#[doc = "Signed rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshld_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(srshl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrshld_s64(a: i64, b: i64) -> i64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.srshl.i64"
-        )]
-        fn _vrshld_s64(a: i64, b: i64) -> i64;
-    }
-    unsafe { _vrshld_s64(a, b) }
-}
-#[doc = "Unsigned rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshld_u64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(urshl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrshld_u64(a: u64, b: i64) -> u64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.urshl.i64"
-        )]
-        fn _vrshld_u64(a: u64, b: i64) -> u64;
-    }
-    unsafe { _vrshld_u64(a, b) }
-}
-#[doc = "Signed rounding shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrd_n_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(srshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrshrd_n_s64<const N: i32>(a: i64) -> i64 {
-    static_assert!(N >= 1 && N <= 64);
-    vrshld_s64(a, -N as i64)
-}
-#[doc = "Unsigned rounding shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrd_n_u64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(urshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrshrd_n_u64<const N: i32>(a: u64) -> u64 {
-    static_assert!(N >= 1 && N <= 64);
-    vrshld_u64(a, -N as i64)
-}
-#[doc = "Rounding shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    vcombine_s8(a, vrshrn_n_s16::<N>(b))
+pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
+    vcombine_s8(a, vrsubhn_s16(b, c))
 }
-#[doc = "Rounding shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s32)"]
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
+#[cfg(target_endian = "big")]
+#[cfg_attr(test, assert_instr(rsubhn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    vcombine_s16(a, vrshrn_n_s32::<N>(b))
+pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
+    vcombine_s16(a, vrsubhn_s32(b, c))
 }
-#[doc = "Rounding shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s64)"]
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
+#[cfg(target_endian = "big")]
+#[cfg_attr(test, assert_instr(rsubhn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    vcombine_s32(a, vrshrn_n_s64::<N>(b))
+pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
+    vcombine_s32(a, vrsubhn_s64(b, c))
 }
-#[doc = "Rounding shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u16)"]
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
+#[cfg(target_endian = "big")]
+#[cfg_attr(test, assert_instr(rsubhn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    vcombine_u8(a, vrshrn_n_u16::<N>(b))
+pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
+    vcombine_u8(a, vrsubhn_u16(b, c))
 }
-#[doc = "Rounding shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u32)"]
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
+#[cfg(target_endian = "big")]
+#[cfg_attr(test, assert_instr(rsubhn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    vcombine_u16(a, vrshrn_n_u32::<N>(b))
+pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
+    vcombine_u16(a, vrsubhn_u32(b, c))
 }
-#[doc = "Rounding shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u64)"]
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
+#[cfg(target_endian = "big")]
+#[cfg_attr(test, assert_instr(rsubhn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    vcombine_u32(a, vrshrn_n_u64::<N>(b))
+pub fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
+    vcombine_u32(a, vrsubhn_u64(b, c))
 }
-#[doc = "Reciprocal square-root estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f64)"]
+#[doc = "Multi-vector floating-point adjust exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscale_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frsqrte))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrsqrte_f64(a: float64x1_t) -> float64x1_t {
+#[unstable(feature = "stdarch_neon_fp8", issue = "none")]
+#[target_feature(enable = "neon,fp8")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))]
+pub fn vscale_f16(vn: float16x4_t, vm: int16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrte.v1f64"
+            link_name = "llvm.aarch64.neon.fp8.fscale.v4f16"
         )]
-        fn _vrsqrte_f64(a: float64x1_t) -> float64x1_t;
+        fn _vscale_f16(vn: float16x4_t, vm: int16x4_t) -> float16x4_t;
     }
-    unsafe { _vrsqrte_f64(a) }
+    unsafe { _vscale_f16(vn, vm) }
 }
-#[doc = "Reciprocal square-root estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f64)"]
+#[doc = "Multi-vector floating-point adjust exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscaleq_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frsqrte))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrsqrteq_f64(a: float64x2_t) -> float64x2_t {
+#[unstable(feature = "stdarch_neon_fp8", issue = "none")]
+#[target_feature(enable = "neon,fp8")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))]
+pub fn vscaleq_f16(vn: float16x8_t, vm: int16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrte.v2f64"
+            link_name = "llvm.aarch64.neon.fp8.fscale.v8f16"
         )]
-        fn _vrsqrteq_f64(a: float64x2_t) -> float64x2_t;
+        fn _vscaleq_f16(vn: float16x8_t, vm: int16x8_t) -> float16x8_t;
     }
-    unsafe { _vrsqrteq_f64(a) }
+    unsafe { _vscaleq_f16(vn, vm) }
 }
-#[doc = "Reciprocal square-root estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrted_f64)"]
+#[doc = "Multi-vector floating-point adjust exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscale_f32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frsqrte))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrsqrted_f64(a: f64) -> f64 {
+#[unstable(feature = "stdarch_neon_fp8", issue = "none")]
+#[target_feature(enable = "neon,fp8")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))]
+pub fn vscale_f32(vn: float32x2_t, vm: int32x2_t) -> float32x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrte.f64"
+            link_name = "llvm.aarch64.neon.fp8.fscale.v2f32"
         )]
-        fn _vrsqrted_f64(a: f64) -> f64;
+        fn _vscale_f32(vn: float32x2_t, vm: int32x2_t) -> float32x2_t;
     }
-    unsafe { _vrsqrted_f64(a) }
+    unsafe { _vscale_f32(vn, vm) }
 }
-#[doc = "Reciprocal square-root estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtes_f32)"]
+#[doc = "Multi-vector floating-point adjust exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscaleq_f32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frsqrte))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrsqrtes_f32(a: f32) -> f32 {
+#[unstable(feature = "stdarch_neon_fp8", issue = "none")]
+#[target_feature(enable = "neon,fp8")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))]
+pub fn vscaleq_f32(vn: float32x4_t, vm: int32x4_t) -> float32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrte.f32"
+            link_name = "llvm.aarch64.neon.fp8.fscale.v4f32"
         )]
-        fn _vrsqrtes_f32(a: f32) -> f32;
+        fn _vscaleq_f32(vn: float32x4_t, vm: int32x4_t) -> float32x4_t;
     }
-    unsafe { _vrsqrtes_f32(a) }
+    unsafe { _vscaleq_f32(vn, vm) }
 }
-#[doc = "Reciprocal square-root estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteh_f16)"]
+#[doc = "Multi-vector floating-point adjust exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscaleq_f64)"]
 #[inline]
-#[cfg_attr(test, assert_instr(frsqrte))]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vrsqrteh_f16(a: f16) -> f16 {
+#[unstable(feature = "stdarch_neon_fp8", issue = "none")]
+#[target_feature(enable = "neon,fp8")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))]
+pub fn vscaleq_f64(vn: float64x2_t, vm: int64x2_t) -> float64x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrte.f16"
+            link_name = "llvm.aarch64.neon.fp8.fscale.v2f64"
         )]
-        fn _vrsqrteh_f16(a: f16) -> f16;
+        fn _vscaleq_f64(vn: float64x2_t, vm: int64x2_t) -> float64x2_t;
     }
-    unsafe { _vrsqrteh_f16(a) }
+    unsafe { _vscaleq_f64(vn, vm) }
 }
-#[doc = "Floating-point reciprocal square root step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f64)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frsqrts))]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrsqrts_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrts.v1f64"
-        )]
-        fn _vrsqrts_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
-    }
-    unsafe { _vrsqrts_f64(a, b) }
+pub fn vset_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> float64x1_t {
+    static_assert!(LANE == 0);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Floating-point reciprocal square root step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f64)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frsqrts))]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrsqrtsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+pub fn vsetq_lane_f64<const LANE: i32>(a: f64, b: float64x2_t) -> float64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe { simd_insert!(b, LANE as u32, a) }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vsetq_lane_f64<const LANE: i32>(a: f64, b: float64x2_t) -> float64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe {
+        let b: float64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float64x2_t = simd_insert!(b, LANE as u32, a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "SHA512 hash update part 2"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512h2q_u64)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(sha512h2))]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+pub fn vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrts.v2f64"
+            link_name = "llvm.aarch64.crypto.sha512h2"
         )]
-        fn _vrsqrtsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+        fn _vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
     }
-    unsafe { _vrsqrtsq_f64(a, b) }
+    unsafe { _vsha512h2q_u64(a, b, c) }
 }
-#[doc = "Floating-point reciprocal square root step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsd_f64)"]
+#[doc = "SHA512 hash update part 2"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512h2q_u64)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frsqrts))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrsqrtsd_f64(a: f64, b: f64) -> f64 {
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(sha512h2))]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+pub fn vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrts.f64"
+            link_name = "llvm.aarch64.crypto.sha512h2"
         )]
-        fn _vrsqrtsd_f64(a: f64, b: f64) -> f64;
+        fn _vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
+    }
+    unsafe {
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let c: uint64x2_t = simd_shuffle!(c, c, [1, 0]);
+        let ret_val: uint64x2_t = _vsha512h2q_u64(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
-    unsafe { _vrsqrtsd_f64(a, b) }
 }
-#[doc = "Floating-point reciprocal square root step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtss_f32)"]
+#[doc = "SHA512 hash update part 1"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512hq_u64)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frsqrts))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrsqrtss_f32(a: f32, b: f32) -> f32 {
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(sha512h))]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+pub fn vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrts.f32"
+            link_name = "llvm.aarch64.crypto.sha512h"
         )]
-        fn _vrsqrtss_f32(a: f32, b: f32) -> f32;
+        fn _vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
     }
-    unsafe { _vrsqrtss_f32(a, b) }
+    unsafe { _vsha512hq_u64(a, b, c) }
 }
-#[doc = "Floating-point reciprocal square root step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsh_f16)"]
+#[doc = "SHA512 hash update part 1"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512hq_u64)"]
 #[inline]
-#[target_feature(enable = "neon,fp16")]
-#[cfg_attr(test, assert_instr(frsqrts))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vrsqrtsh_f16(a: f16, b: f16) -> f16 {
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(sha512h))]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+pub fn vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrts.f16"
+            link_name = "llvm.aarch64.crypto.sha512h"
         )]
-        fn _vrsqrtsh_f16(a: f16, b: f16) -> f16;
+        fn _vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
+    }
+    unsafe {
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let c: uint64x2_t = simd_shuffle!(c, c, [1, 0]);
+        let ret_val: uint64x2_t = _vsha512hq_u64(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
-    unsafe { _vrsqrtsh_f16(a, b) }
 }
-#[doc = "Signed rounding shift right and accumulate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsrad_n_s64)"]
+#[doc = "SHA512 schedule update 0"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su0q_u64)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(srshr, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
-    static_assert!(N >= 1 && N <= 64);
-    let b: i64 = vrshrd_n_s64::<N>(b);
-    a.wrapping_add(b)
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(sha512su0))]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+pub fn vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sha512su0"
+        )]
+        fn _vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t;
+    }
+    unsafe { _vsha512su0q_u64(a, b) }
 }
-#[doc = "Unsigned rounding shift right and accumulate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsrad_n_u64)"]
+#[doc = "SHA512 schedule update 0"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su0q_u64)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(urshr, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
-    static_assert!(N >= 1 && N <= 64);
-    let b: u64 = vrshrd_n_u64::<N>(b);
-    a.wrapping_add(b)
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(sha512su0))]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+pub fn vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sha512su0"
+        )]
+        fn _vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t;
+    }
+    unsafe {
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: uint64x2_t = _vsha512su0q_u64(a, b);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s16)"]
+#[doc = "SHA512 schedule update 1"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su1q_u64)"]
 #[inline]
-#[target_feature(enable = "neon")]
 #[cfg(target_endian = "little")]
-#[cfg_attr(test, assert_instr(rsubhn2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
-    vcombine_s8(a, vrsubhn_s16(b, c))
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(sha512su1))]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+pub fn vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sha512su1"
+        )]
+        fn _vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
+    }
+    unsafe { _vsha512su1q_u64(a, b, c) }
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s32)"]
+#[doc = "SHA512 schedule update 1"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su1q_u64)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(target_endian = "little")]
-#[cfg_attr(test, assert_instr(rsubhn2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
-    vcombine_s16(a, vrsubhn_s32(b, c))
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(sha512su1))]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+pub fn vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sha512su1"
+        )]
+        fn _vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
+    }
+    unsafe {
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let c: uint64x2_t = simd_shuffle!(c, c, [1, 0]);
+        let ret_val: uint64x2_t = _vsha512su1q_u64(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s64)"]
+#[doc = "Signed Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshld_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_endian = "little")]
-#[cfg_attr(test, assert_instr(rsubhn2))]
+#[cfg_attr(test, assert_instr(sshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
-    vcombine_s32(a, vrsubhn_s64(b, c))
+pub fn vshld_s64(a: i64, b: i64) -> i64 {
+    unsafe { transmute(vshl_s64(transmute(a), transmute(b))) }
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u16)"]
+#[doc = "Unsigned Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshld_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_endian = "little")]
-#[cfg_attr(test, assert_instr(rsubhn2))]
+#[cfg_attr(test, assert_instr(ushl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
-    vcombine_u8(a, vrsubhn_u16(b, c))
+pub fn vshld_u64(a: u64, b: i64) -> u64 {
+    unsafe { transmute(vshl_u64(transmute(a), transmute(b))) }
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u32)"]
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_endian = "little")]
-#[cfg_attr(test, assert_instr(rsubhn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sshll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
-    vcombine_u16(a, vrsubhn_u32(b, c))
+pub fn vshll_high_n_s8<const N: i32>(a: int8x16_t) -> int16x8_t {
+    static_assert!(N >= 0 && N <= 8);
+    let b = vget_high_s8(a);
+    vshll_n_s8::<N>(b)
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u64)"]
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_endian = "little")]
-#[cfg_attr(test, assert_instr(rsubhn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sshll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
-    vcombine_u32(a, vrsubhn_u64(b, c))
+pub fn vshll_high_n_s16<const N: i32>(a: int16x8_t) -> int32x4_t {
+    static_assert!(N >= 0 && N <= 16);
+    let b = vget_high_s16(a);
+    vshll_n_s16::<N>(b)
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s16)"]
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_endian = "big")]
-#[cfg_attr(test, assert_instr(rsubhn))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
-    vcombine_s8(a, vrsubhn_s16(b, c))
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sshll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vshll_high_n_s32<const N: i32>(a: int32x4_t) -> int64x2_t {
+    static_assert!(N >= 0 && N <= 32);
+    let b = vget_high_s32(a);
+    vshll_n_s32::<N>(b)
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s32)"]
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_endian = "big")]
-#[cfg_attr(test, assert_instr(rsubhn))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ushll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
-    vcombine_s16(a, vrsubhn_s32(b, c))
+pub fn vshll_high_n_u8<const N: i32>(a: uint8x16_t) -> uint16x8_t {
+    static_assert!(N >= 0 && N <= 8);
+    let b: uint8x8_t = vget_high_u8(a);
+    vshll_n_u8::<N>(b)
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s64)"]
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_endian = "big")]
-#[cfg_attr(test, assert_instr(rsubhn))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ushll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
-    vcombine_s32(a, vrsubhn_s64(b, c))
+pub fn vshll_high_n_u16<const N: i32>(a: uint16x8_t) -> uint32x4_t {
+    static_assert!(N >= 0 && N <= 16);
+    let b: uint16x4_t = vget_high_u16(a);
+    vshll_n_u16::<N>(b)
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u16)"]
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_endian = "big")]
-#[cfg_attr(test, assert_instr(rsubhn))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ushll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
-    vcombine_u8(a, vrsubhn_u16(b, c))
+pub fn vshll_high_n_u32<const N: i32>(a: uint32x4_t) -> uint64x2_t {
+    static_assert!(N >= 0 && N <= 32);
+    let b: uint32x2_t = vget_high_u32(a);
+    vshll_n_u32::<N>(b)
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u32)"]
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_endian = "big")]
-#[cfg_attr(test, assert_instr(rsubhn))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
-    vcombine_u16(a, vrsubhn_u32(b, c))
+pub fn vshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    vcombine_s8(a, vshrn_n_s16::<N>(b))
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u64)"]
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_endian = "big")]
-#[cfg_attr(test, assert_instr(rsubhn))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
-    vcombine_u32(a, vrsubhn_u64(b, c))
+pub fn vshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    vcombine_s16(a, vshrn_n_s32::<N>(b))
 }
-#[doc = "Multi-vector floating-point adjust exponent"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscale_f16)"]
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s64)"]
 #[inline]
-#[unstable(feature = "stdarch_neon_fp8", issue = "none")]
-#[target_feature(enable = "neon,fp8")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))]
-pub fn vscale_f16(vn: float16x4_t, vm: int16x4_t) -> float16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fp8.fscale.v4f16"
-        )]
-        fn _vscale_f16(vn: float16x4_t, vm: int16x4_t) -> float16x4_t;
-    }
-    unsafe { _vscale_f16(vn, vm) }
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    vcombine_s32(a, vshrn_n_s64::<N>(b))
 }
-#[doc = "Multi-vector floating-point adjust exponent"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscaleq_f16)"]
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u16)"]
 #[inline]
-#[unstable(feature = "stdarch_neon_fp8", issue = "none")]
-#[target_feature(enable = "neon,fp8")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))]
-pub fn vscaleq_f16(vn: float16x8_t, vm: int16x8_t) -> float16x8_t {
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    vcombine_u8(a, vshrn_n_u16::<N>(b))
+}
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    vcombine_u16(a, vshrn_n_u32::<N>(b))
+}
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    vcombine_u32(a, vshrn_n_u64::<N>(b))
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vsli_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    static_assert_uimm_bits!(N, 3);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fp8.fscale.v8f16"
+            link_name = "llvm.aarch64.neon.vsli.v8i8"
         )]
-        fn _vscaleq_f16(vn: float16x8_t, vm: int16x8_t) -> float16x8_t;
+        fn _vsli_n_s8(a: int8x8_t, b: int8x8_t, n: i32) -> int8x8_t;
     }
-    unsafe { _vscaleq_f16(vn, vm) }
+    unsafe { _vsli_n_s8(a, b, N) }
 }
-#[doc = "Multi-vector floating-point adjust exponent"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscale_f32)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s8)"]
 #[inline]
-#[unstable(feature = "stdarch_neon_fp8", issue = "none")]
-#[target_feature(enable = "neon,fp8")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))]
-pub fn vscale_f32(vn: float32x2_t, vm: int32x2_t) -> float32x2_t {
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vsliq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    static_assert_uimm_bits!(N, 3);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fp8.fscale.v2f32"
+            link_name = "llvm.aarch64.neon.vsli.v16i8"
         )]
-        fn _vscale_f32(vn: float32x2_t, vm: int32x2_t) -> float32x2_t;
+        fn _vsliq_n_s8(a: int8x16_t, b: int8x16_t, n: i32) -> int8x16_t;
     }
-    unsafe { _vscale_f32(vn, vm) }
+    unsafe { _vsliq_n_s8(a, b, N) }
 }
-#[doc = "Multi-vector floating-point adjust exponent"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscaleq_f32)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s16)"]
 #[inline]
-#[unstable(feature = "stdarch_neon_fp8", issue = "none")]
-#[target_feature(enable = "neon,fp8")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))]
-pub fn vscaleq_f32(vn: float32x4_t, vm: int32x4_t) -> float32x4_t {
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vsli_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(N, 4);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fp8.fscale.v4f32"
+            link_name = "llvm.aarch64.neon.vsli.v4i16"
         )]
-        fn _vscaleq_f32(vn: float32x4_t, vm: int32x4_t) -> float32x4_t;
+        fn _vsli_n_s16(a: int16x4_t, b: int16x4_t, n: i32) -> int16x4_t;
     }
-    unsafe { _vscaleq_f32(vn, vm) }
+    unsafe { _vsli_n_s16(a, b, N) }
 }
-#[doc = "Multi-vector floating-point adjust exponent"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscaleq_f64)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s16)"]
 #[inline]
-#[unstable(feature = "stdarch_neon_fp8", issue = "none")]
-#[target_feature(enable = "neon,fp8")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))]
-pub fn vscaleq_f64(vn: float64x2_t, vm: int64x2_t) -> float64x2_t {
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vsliq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(N, 4);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fp8.fscale.v2f64"
+            link_name = "llvm.aarch64.neon.vsli.v8i16"
         )]
-        fn _vscaleq_f64(vn: float64x2_t, vm: int64x2_t) -> float64x2_t;
+        fn _vsliq_n_s16(a: int16x8_t, b: int16x8_t, n: i32) -> int16x8_t;
     }
-    unsafe { _vscaleq_f64(vn, vm) }
+    unsafe { _vsliq_n_s16(a, b, N) }
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f64)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vset_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> float64x1_t {
-    static_assert!(LANE == 0);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vsli_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert!(N >= 0 && N <= 31);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vsli.v2i32"
+        )]
+        fn _vsli_n_s32(a: int32x2_t, b: int32x2_t, n: i32) -> int32x2_t;
+    }
+    unsafe { _vsli_n_s32(a, b, N) }
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f64)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vsetq_lane_f64<const LANE: i32>(a: f64, b: float64x2_t) -> float64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { simd_insert!(b, LANE as u32, a) }
-}
-#[doc = "SHA512 hash update part 2"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512h2q_u64)"]
-#[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(sha512h2))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub fn vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+pub fn vsliq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert!(N >= 0 && N <= 31);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sha512h2"
+            link_name = "llvm.aarch64.neon.vsli.v4i32"
         )]
-        fn _vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
+        fn _vsliq_n_s32(a: int32x4_t, b: int32x4_t, n: i32) -> int32x4_t;
     }
-    unsafe { _vsha512h2q_u64(a, b, c) }
-}
-#[doc = "SHA512 hash update part 1"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512hq_u64)"]
-#[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(sha512h))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub fn vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sha512h"
-        )]
-        fn _vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
-    }
-    unsafe { _vsha512hq_u64(a, b, c) }
+    unsafe { _vsliq_n_s32(a, b, N) }
 }
-#[doc = "SHA512 schedule update 0"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su0q_u64)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s64)"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(sha512su0))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub fn vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vsli_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    static_assert!(N >= 0 && N <= 63);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sha512su0"
+            link_name = "llvm.aarch64.neon.vsli.v1i64"
         )]
-        fn _vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t;
+        fn _vsli_n_s64(a: int64x1_t, b: int64x1_t, n: i32) -> int64x1_t;
     }
-    unsafe { _vsha512su0q_u64(a, b) }
+    unsafe { _vsli_n_s64(a, b, N) }
 }
-#[doc = "SHA512 schedule update 1"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su1q_u64)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s64)"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(sha512su1))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub fn vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vsliq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    static_assert!(N >= 0 && N <= 63);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sha512su1"
+            link_name = "llvm.aarch64.neon.vsli.v2i64"
         )]
-        fn _vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
+        fn _vsliq_n_s64(a: int64x2_t, b: int64x2_t, n: i32) -> int64x2_t;
     }
-    unsafe { _vsha512su1q_u64(a, b, c) }
+    unsafe { _vsliq_n_s64(a, b, N) }
 }
-#[doc = "Signed Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshld_s64)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sshl))]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vshld_s64(a: i64, b: i64) -> i64 {
-    unsafe { transmute(vshl_s64(transmute(a), transmute(b))) }
+pub fn vsli_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe { transmute(vsli_n_s8::<N>(transmute(a), transmute(b))) }
 }
-#[doc = "Unsigned Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshld_u64)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ushl))]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vshld_u64(a: u64, b: i64) -> u64 {
-    unsafe { transmute(vshl_u64(transmute(a), transmute(b))) }
+pub fn vsliq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe { transmute(vsliq_n_s8::<N>(transmute(a), transmute(b))) }
 }
-#[doc = "Signed shift left long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s8)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(sshll2, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vshll_high_n_s8<const N: i32>(a: int8x16_t) -> int16x8_t {
-    static_assert!(N >= 0 && N <= 8);
-    let b = vget_high_s8(a);
-    vshll_n_s8::<N>(b)
+pub fn vsli_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe { transmute(vsli_n_s16::<N>(transmute(a), transmute(b))) }
 }
-#[doc = "Signed shift left long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s16)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(sshll2, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vshll_high_n_s16<const N: i32>(a: int16x8_t) -> int32x4_t {
-    static_assert!(N >= 0 && N <= 16);
-    let b = vget_high_s16(a);
-    vshll_n_s16::<N>(b)
+pub fn vsliq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe { transmute(vsliq_n_s16::<N>(transmute(a), transmute(b))) }
 }
-#[doc = "Signed shift left long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s32)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(sshll2, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vshll_high_n_s32<const N: i32>(a: int32x4_t) -> int64x2_t {
-    static_assert!(N >= 0 && N <= 32);
-    let b = vget_high_s32(a);
-    vshll_n_s32::<N>(b)
+pub fn vsli_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    static_assert!(N >= 0 && N <= 31);
+    unsafe { transmute(vsli_n_s32::<N>(transmute(a), transmute(b))) }
 }
-#[doc = "Signed shift left long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u8)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(ushll2, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vshll_high_n_u8<const N: i32>(a: uint8x16_t) -> uint16x8_t {
-    static_assert!(N >= 0 && N <= 8);
-    let b: uint8x8_t = vget_high_u8(a);
-    vshll_n_u8::<N>(b)
+pub fn vsliq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    static_assert!(N >= 0 && N <= 31);
+    unsafe { transmute(vsliq_n_s32::<N>(transmute(a), transmute(b))) }
 }
-#[doc = "Signed shift left long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u16)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(ushll2, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vshll_high_n_u16<const N: i32>(a: uint16x8_t) -> uint32x4_t {
-    static_assert!(N >= 0 && N <= 16);
-    let b: uint16x4_t = vget_high_u16(a);
-    vshll_n_u16::<N>(b)
+pub fn vsli_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    static_assert!(N >= 0 && N <= 63);
+    unsafe { transmute(vsli_n_s64::<N>(transmute(a), transmute(b))) }
 }
-#[doc = "Signed shift left long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u32)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(ushll2, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vshll_high_n_u32<const N: i32>(a: uint32x4_t) -> uint64x2_t {
-    static_assert!(N >= 0 && N <= 32);
-    let b: uint32x2_t = vget_high_u32(a);
-    vshll_n_u32::<N>(b)
+pub fn vsliq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    static_assert!(N >= 0 && N <= 63);
+    unsafe { transmute(vsliq_n_s64::<N>(transmute(a), transmute(b))) }
 }
-#[doc = "Shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s16)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    vcombine_s8(a, vshrn_n_s16::<N>(b))
+pub fn vsli_n_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe { transmute(vsli_n_s8::<N>(transmute(a), transmute(b))) }
 }
-#[doc = "Shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s32)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    vcombine_s16(a, vshrn_n_s32::<N>(b))
+pub fn vsliq_n_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe { transmute(vsliq_n_s8::<N>(transmute(a), transmute(b))) }
 }
-#[doc = "Shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s64)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    vcombine_s32(a, vshrn_n_s64::<N>(b))
+pub fn vsli_n_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe { transmute(vsli_n_s16::<N>(transmute(a), transmute(b))) }
 }
-#[doc = "Shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u16)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    vcombine_u8(a, vshrn_n_u16::<N>(b))
+pub fn vsliq_n_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe { transmute(vsliq_n_s16::<N>(transmute(a), transmute(b))) }
 }
-#[doc = "Shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u32)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p64)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    vcombine_u16(a, vshrn_n_u32::<N>(b))
+pub fn vsli_n_p64<const N: i32>(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
+    static_assert!(N >= 0 && N <= 63);
+    unsafe { transmute(vsli_n_s64::<N>(transmute(a), transmute(b))) }
 }
-#[doc = "Shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u64)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p64)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    vcombine_u32(a, vshrn_n_u64::<N>(b))
+pub fn vsliq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    static_assert!(N >= 0 && N <= 63);
+    unsafe { transmute(vsliq_n_s64::<N>(transmute(a), transmute(b))) }
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s8)"]
+#[doc = "Shift left and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vslid_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vsli_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe extern "unadjusted" {
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sli, N = 2))]
+pub fn vslid_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
+    static_assert!(N >= 0 && N <= 63);
+    unsafe { transmute(vsli_n_s64::<N>(transmute(a), transmute(b))) }
+}
+#[doc = "Shift left and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vslid_n_u64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sli, N = 2))]
+pub fn vslid_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
+    static_assert!(N >= 0 && N <= 63);
+    unsafe { transmute(vsli_n_u64::<N>(transmute(a), transmute(b))) }
+}
+#[doc = "SM3PARTW1"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw1q_u32)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3partw1))]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub fn vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vsli.v8i8"
+            link_name = "llvm.aarch64.crypto.sm3partw1"
         )]
-        fn _vsli_n_s8(a: int8x8_t, b: int8x8_t, n: i32) -> int8x8_t;
+        fn _vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
     }
-    unsafe { _vsli_n_s8(a, b, N) }
+    unsafe { _vsm3partw1q_u32(a, b, c) }
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s8)"]
+#[doc = "SM3PARTW1"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw1q_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vsliq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    static_assert_uimm_bits!(N, 3);
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3partw1))]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub fn vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vsli.v16i8"
+            link_name = "llvm.aarch64.crypto.sm3partw1"
         )]
-        fn _vsliq_n_s8(a: int8x16_t, b: int8x16_t, n: i32) -> int8x16_t;
+        fn _vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
+    }
+    unsafe {
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let c: uint32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = _vsm3partw1q_u32(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
-    unsafe { _vsliq_n_s8(a, b, N) }
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s16)"]
+#[doc = "SM3PARTW2"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw2q_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vsli_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(N, 4);
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3partw2))]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub fn vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vsli.v4i16"
+            link_name = "llvm.aarch64.crypto.sm3partw2"
         )]
-        fn _vsli_n_s16(a: int16x4_t, b: int16x4_t, n: i32) -> int16x4_t;
+        fn _vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
     }
-    unsafe { _vsli_n_s16(a, b, N) }
+    unsafe { _vsm3partw2q_u32(a, b, c) }
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s16)"]
+#[doc = "SM3PARTW2"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw2q_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vsliq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(N, 4);
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3partw2))]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub fn vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vsli.v8i16"
+            link_name = "llvm.aarch64.crypto.sm3partw2"
         )]
-        fn _vsliq_n_s16(a: int16x8_t, b: int16x8_t, n: i32) -> int16x8_t;
+        fn _vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
+    }
+    unsafe {
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let c: uint32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = _vsm3partw2q_u32(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
-    unsafe { _vsliq_n_s16(a, b, N) }
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s32)"]
+#[doc = "SM3SS1"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3ss1q_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vsli_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert!(N >= 0 && N <= 31);
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3ss1))]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub fn vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vsli.v2i32"
+            link_name = "llvm.aarch64.crypto.sm3ss1"
         )]
-        fn _vsli_n_s32(a: int32x2_t, b: int32x2_t, n: i32) -> int32x2_t;
+        fn _vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
     }
-    unsafe { _vsli_n_s32(a, b, N) }
+    unsafe { _vsm3ss1q_u32(a, b, c) }
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s32)"]
+#[doc = "SM3SS1"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3ss1q_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vsliq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert!(N >= 0 && N <= 31);
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3ss1))]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub fn vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vsli.v4i32"
+            link_name = "llvm.aarch64.crypto.sm3ss1"
         )]
-        fn _vsliq_n_s32(a: int32x4_t, b: int32x4_t, n: i32) -> int32x4_t;
+        fn _vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
+    }
+    unsafe {
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let c: uint32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = _vsm3ss1q_u32(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
-    unsafe { _vsliq_n_s32(a, b, N) }
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s64)"]
+#[doc = "SM3TT1A"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt1aq_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vsli_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    static_assert!(N >= 0 && N <= 63);
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3tt1a, IMM2 = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub fn vsm3tt1aq_u32<const IMM2: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(IMM2, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vsli.v1i64"
+            link_name = "llvm.aarch64.crypto.sm3tt1a"
         )]
-        fn _vsli_n_s64(a: int64x1_t, b: int64x1_t, n: i32) -> int64x1_t;
+        fn _vsm3tt1aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t;
     }
-    unsafe { _vsli_n_s64(a, b, N) }
+    unsafe { _vsm3tt1aq_u32(a, b, c, IMM2 as i64) }
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s64)"]
+#[doc = "SM3TT1A"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt1aq_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vsliq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    static_assert!(N >= 0 && N <= 63);
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3tt1a, IMM2 = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub fn vsm3tt1aq_u32<const IMM2: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(IMM2, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vsli.v2i64"
+            link_name = "llvm.aarch64.crypto.sm3tt1a"
         )]
-        fn _vsliq_n_s64(a: int64x2_t, b: int64x2_t, n: i32) -> int64x2_t;
+        fn _vsm3tt1aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t;
+    }
+    unsafe {
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let c: uint32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = _vsm3tt1aq_u32(a, b, c, IMM2 as i64);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
-    unsafe { _vsliq_n_s64(a, b, N) }
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u8)"]
+#[doc = "SM3TT1B"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt1bq_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vsli_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe { transmute(vsli_n_s8::<N>(transmute(a), transmute(b))) }
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3tt1b, IMM2 = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub fn vsm3tt1bq_u32<const IMM2: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(IMM2, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sm3tt1b"
+        )]
+        fn _vsm3tt1bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t;
+    }
+    unsafe { _vsm3tt1bq_u32(a, b, c, IMM2 as i64) }
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u8)"]
+#[doc = "SM3TT1B"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt1bq_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vsliq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe { transmute(vsliq_n_s8::<N>(transmute(a), transmute(b))) }
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3tt1b, IMM2 = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub fn vsm3tt1bq_u32<const IMM2: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(IMM2, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sm3tt1b"
+        )]
+        fn _vsm3tt1bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t;
+    }
+    unsafe {
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let c: uint32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = _vsm3tt1bq_u32(a, b, c, IMM2 as i64);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vsli_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe { transmute(vsli_n_s16::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vsliq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe { transmute(vsliq_n_s16::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vsli_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    static_assert!(N >= 0 && N <= 31);
-    unsafe { transmute(vsli_n_s32::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vsliq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    static_assert!(N >= 0 && N <= 31);
-    unsafe { transmute(vsliq_n_s32::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vsli_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    static_assert!(N >= 0 && N <= 63);
-    unsafe { transmute(vsli_n_s64::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vsliq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    static_assert!(N >= 0 && N <= 63);
-    unsafe { transmute(vsliq_n_s64::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vsli_n_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe { transmute(vsli_n_s8::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vsliq_n_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe { transmute(vsliq_n_s8::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vsli_n_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe { transmute(vsli_n_s16::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vsliq_n_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe { transmute(vsliq_n_s16::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p64)"]
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vsli_n_p64<const N: i32>(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
-    static_assert!(N >= 0 && N <= 63);
-    unsafe { transmute(vsli_n_s64::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p64)"]
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vsliq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
-    static_assert!(N >= 0 && N <= 63);
-    unsafe { transmute(vsliq_n_s64::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "Shift left and insert"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vslid_n_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sli, N = 2))]
-pub fn vslid_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
-    static_assert!(N >= 0 && N <= 63);
-    unsafe { transmute(vsli_n_s64::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "Shift left and insert"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vslid_n_u64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sli, N = 2))]
-pub fn vslid_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
-    static_assert!(N >= 0 && N <= 63);
-    unsafe { transmute(vsli_n_u64::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "SM3PARTW1"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw1q_u32)"]
-#[inline]
-#[target_feature(enable = "neon,sm4")]
-#[cfg_attr(test, assert_instr(sm3partw1))]
-#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
-pub fn vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sm3partw1"
-        )]
-        fn _vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
-    }
-    unsafe { _vsm3partw1q_u32(a, b, c) }
-}
-#[doc = "SM3PARTW2"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw2q_u32)"]
-#[inline]
-#[target_feature(enable = "neon,sm4")]
-#[cfg_attr(test, assert_instr(sm3partw2))]
-#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
-pub fn vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sm3partw2"
-        )]
-        fn _vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
-    }
-    unsafe { _vsm3partw2q_u32(a, b, c) }
-}
-#[doc = "SM3SS1"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3ss1q_u32)"]
-#[inline]
-#[target_feature(enable = "neon,sm4")]
-#[cfg_attr(test, assert_instr(sm3ss1))]
-#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
-pub fn vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sm3ss1"
-        )]
-        fn _vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
-    }
-    unsafe { _vsm3ss1q_u32(a, b, c) }
-}
-#[doc = "SM3TT1A"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt1aq_u32)"]
+#[doc = "SM3TT2A"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt2aq_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,sm4")]
-#[cfg_attr(test, assert_instr(sm3tt1a, IMM2 = 0))]
+#[cfg_attr(test, assert_instr(sm3tt2a, IMM2 = 0))]
 #[rustc_legacy_const_generics(3)]
 #[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
-pub fn vsm3tt1aq_u32<const IMM2: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+pub fn vsm3tt2aq_u32<const IMM2: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
     static_assert_uimm_bits!(IMM2, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sm3tt1a"
+            link_name = "llvm.aarch64.crypto.sm3tt2a"
         )]
-        fn _vsm3tt1aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t;
+        fn _vsm3tt2aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t;
     }
-    unsafe { _vsm3tt1aq_u32(a, b, c, IMM2 as i64) }
+    unsafe { _vsm3tt2aq_u32(a, b, c, IMM2 as i64) }
 }
-#[doc = "SM3TT1B"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt1bq_u32)"]
+#[doc = "SM3TT2A"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt2aq_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,sm4")]
-#[cfg_attr(test, assert_instr(sm3tt1b, IMM2 = 0))]
+#[cfg_attr(test, assert_instr(sm3tt2a, IMM2 = 0))]
 #[rustc_legacy_const_generics(3)]
 #[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
-pub fn vsm3tt1bq_u32<const IMM2: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+pub fn vsm3tt2aq_u32<const IMM2: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
     static_assert_uimm_bits!(IMM2, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sm3tt1b"
+            link_name = "llvm.aarch64.crypto.sm3tt2a"
         )]
-        fn _vsm3tt1bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t;
+        fn _vsm3tt2aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t;
+    }
+    unsafe {
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let c: uint32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = _vsm3tt2aq_u32(a, b, c, IMM2 as i64);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
-    unsafe { _vsm3tt1bq_u32(a, b, c, IMM2 as i64) }
 }
-#[doc = "SM3TT2A"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt2aq_u32)"]
+#[doc = "SM3TT2B"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt2bq_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,sm4")]
-#[cfg_attr(test, assert_instr(sm3tt2a, IMM2 = 0))]
+#[cfg_attr(test, assert_instr(sm3tt2b, IMM2 = 0))]
 #[rustc_legacy_const_generics(3)]
 #[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
-pub fn vsm3tt2aq_u32<const IMM2: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+pub fn vsm3tt2bq_u32<const IMM2: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
     static_assert_uimm_bits!(IMM2, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sm3tt2a"
+            link_name = "llvm.aarch64.crypto.sm3tt2b"
         )]
-        fn _vsm3tt2aq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t;
+        fn _vsm3tt2bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t;
     }
-    unsafe { _vsm3tt2aq_u32(a, b, c, IMM2 as i64) }
+    unsafe { _vsm3tt2bq_u32(a, b, c, IMM2 as i64) }
 }
 #[doc = "SM3TT2B"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt2bq_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,sm4")]
 #[cfg_attr(test, assert_instr(sm3tt2b, IMM2 = 0))]
 #[rustc_legacy_const_generics(3)]
@@ -20650,11 +24018,18 @@ pub fn vsm3tt2bq_u32<const IMM2: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x4_
         )]
         fn _vsm3tt2bq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, n: i64) -> uint32x4_t;
     }
-    unsafe { _vsm3tt2bq_u32(a, b, c, IMM2 as i64) }
+    unsafe {
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let c: uint32x4_t = simd_shuffle!(c, c, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = _vsm3tt2bq_u32(a, b, c, IMM2 as i64);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "SM4 key"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm4ekeyq_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,sm4")]
 #[cfg_attr(test, assert_instr(sm4ekey))]
 #[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
@@ -20668,9 +24043,32 @@ pub fn vsm4ekeyq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     }
     unsafe { _vsm4ekeyq_u32(a, b) }
 }
-#[doc = "SM4 encode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm4eq_u32)"]
+#[doc = "SM4 key"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm4ekeyq_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm4ekey))]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub fn vsm4ekeyq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sm4ekey"
+        )]
+        fn _vsm4ekeyq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
+    }
+    unsafe {
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = _vsm4ekeyq_u32(a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "SM4 encode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm4eq_u32)"]
+#[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,sm4")]
 #[cfg_attr(test, assert_instr(sm4e))]
 #[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
@@ -20684,6 +24082,28 @@ pub fn vsm4eq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     }
     unsafe { _vsm4eq_u32(a, b) }
 }
+#[doc = "SM4 encode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm4eq_u32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm4e))]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub fn vsm4eq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sm4e"
+        )]
+        fn _vsm4eq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
+    }
+    unsafe {
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = _vsm4eq_u32(a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
 #[doc = "Unsigned saturating Accumulate of Signed value."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadd_u8)"]
 #[inline]
@@ -22526,2432 +25946,5222 @@ pub unsafe fn vstl1q_lane_s64<const LANE: i32>(ptr: *mut i64, val: int64x2_t) {
     let lane: i64 = vgetq_lane_s64::<LANE>(val);
     (*atomic_dst).store(transmute(lane), crate::sync::atomic::Ordering::Release)
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f64)"]
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fsub))]
+pub fn vsub_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    unsafe { simd_sub(a, b) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fsub))]
+pub fn vsubq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe { simd_sub(a, b) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubd_s64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn vsubd_s64(a: i64, b: i64) -> i64 {
+    a.wrapping_sub(b)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubd_u64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn vsubd_u64(a: u64, b: u64) -> u64 {
+    a.wrapping_sub(b)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubh_f16)"]
+#[inline]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(fsub))]
+pub fn vsubh_f16(a: f16, b: f16) -> f16 {
+    a - b
+}
+#[doc = "Signed Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubl2))]
+pub fn vsubl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
+    unsafe {
+        let c: int16x8_t = simd_cast(vget_high_s8(a));
+        let d: int16x8_t = simd_cast(vget_high_s8(b));
+        simd_sub(c, d)
+    }
+}
+#[doc = "Signed Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubl2))]
+pub fn vsubl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+    unsafe {
+        let c: int32x4_t = simd_cast(vget_high_s16(a));
+        let d: int32x4_t = simd_cast(vget_high_s16(b));
+        simd_sub(c, d)
+    }
+}
+#[doc = "Signed Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubl2))]
+pub fn vsubl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+    unsafe {
+        let c: int64x2_t = simd_cast(vget_high_s32(a));
+        let d: int64x2_t = simd_cast(vget_high_s32(b));
+        simd_sub(c, d)
+    }
+}
+#[doc = "Unsigned Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubl2))]
+pub fn vsubl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
+    unsafe {
+        let c: uint16x8_t = simd_cast(vget_high_u8(a));
+        let d: uint16x8_t = simd_cast(vget_high_u8(b));
+        simd_sub(c, d)
+    }
+}
+#[doc = "Unsigned Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubl2))]
+pub fn vsubl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
+    unsafe {
+        let c: uint32x4_t = simd_cast(vget_high_u16(a));
+        let d: uint32x4_t = simd_cast(vget_high_u16(b));
+        simd_sub(c, d)
+    }
+}
+#[doc = "Unsigned Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubl2))]
+pub fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
+    unsafe {
+        let c: uint64x2_t = simd_cast(vget_high_u32(a));
+        let d: uint64x2_t = simd_cast(vget_high_u32(b));
+        simd_sub(c, d)
+    }
+}
+#[doc = "Signed Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubw2))]
+pub fn vsubw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
+    let c = vget_high_s8(b);
+    unsafe { simd_sub(a, simd_cast(c)) }
+}
+#[doc = "Signed Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubw2))]
+pub fn vsubw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
+    let c = vget_high_s16(b);
+    unsafe { simd_sub(a, simd_cast(c)) }
+}
+#[doc = "Signed Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubw2))]
+pub fn vsubw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
+    let c = vget_high_s32(b);
+    unsafe { simd_sub(a, simd_cast(c)) }
+}
+#[doc = "Unsigned Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubw2))]
+pub fn vsubw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
+    let c = vget_high_u8(b);
+    unsafe { simd_sub(a, simd_cast(c)) }
+}
+#[doc = "Unsigned Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubw2))]
+pub fn vsubw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
+    let c = vget_high_u16(b);
+    unsafe { simd_sub(a, simd_cast(c)) }
+}
+#[doc = "Unsigned Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubw2))]
+pub fn vsubw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
+    let c = vget_high_u32(b);
+    unsafe { simd_sub(a, simd_cast(c)) }
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_s8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    vqtbl1_s8(vcombine_s8(a, unsafe { crate::mem::zeroed() }), unsafe {
+        {
+            transmute(b)
+        }
+    })
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_u8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    vqtbl1_u8(vcombine_u8(a, unsafe { crate::mem::zeroed() }), b)
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_p8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t {
+    vqtbl1_p8(vcombine_p8(a, unsafe { crate::mem::zeroed() }), b)
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_s8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t {
+    vqtbl1_s8(vcombine_s8(a.0, a.1), vreinterpret_u8_s8(b))
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t {
+    vqtbl1_u8(vcombine_u8(a.0, a.1), b)
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t {
+    vqtbl1_p8(vcombine_p8(a.0, a.1), b)
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_s8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t {
+    let x = int8x16x2_t(
+        vcombine_s8(a.0, a.1),
+        vcombine_s8(a.2, unsafe { crate::mem::zeroed() }),
+    );
+    vqtbl2_s8(x, vreinterpret_u8_s8(b))
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t {
+    let x = uint8x16x2_t(
+        vcombine_u8(a.0, a.1),
+        vcombine_u8(a.2, unsafe { crate::mem::zeroed() }),
+    );
+    vqtbl2_u8(x, b)
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t {
+    let x = poly8x16x2_t(
+        vcombine_p8(a.0, a.1),
+        vcombine_p8(a.2, unsafe { crate::mem::zeroed() }),
+    );
+    vqtbl2_p8(x, b)
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_s8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t {
+    let x = int8x16x2_t(vcombine_s8(a.0, a.1), vcombine_s8(a.2, a.3));
+    vqtbl2_s8(x, vreinterpret_u8_s8(b))
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t {
+    let x = uint8x16x2_t(vcombine_u8(a.0, a.1), vcombine_u8(a.2, a.3));
+    vqtbl2_u8(x, b)
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t {
+    let x = poly8x16x2_t(vcombine_p8(a.0, a.1), vcombine_p8(a.2, a.3));
+    vqtbl2_p8(x, b)
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_s8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
+    unsafe {
+        simd_select(
+            simd_lt::<int8x8_t, int8x8_t>(c, transmute(i8x8::splat(8))),
+            vqtbx1_s8(
+                a,
+                vcombine_s8(b, crate::mem::zeroed()),
+                vreinterpret_u8_s8(c),
+            ),
+            a,
+        )
+    }
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_u8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
+    unsafe {
+        simd_select(
+            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(8))),
+            vqtbx1_u8(a, vcombine_u8(b, crate::mem::zeroed()), c),
+            a,
+        )
+    }
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_p8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t {
+    unsafe {
+        simd_select(
+            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(8))),
+            vqtbx1_p8(a, vcombine_p8(b, crate::mem::zeroed()), c),
+            a,
+        )
+    }
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_s8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t {
+    unsafe {
+        simd_select(
+            simd_lt::<int8x8_t, int8x8_t>(c, transmute(i8x8::splat(16))),
+            vqtbx1_s8(a, vcombine_s8(b.0, b.1), vreinterpret_u8_s8(c)),
+            a,
+        )
+    }
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t {
+    unsafe {
+        simd_select(
+            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(16))),
+            vqtbx1_u8(a, vcombine_u8(b.0, b.1), c),
+            a,
+        )
+    }
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t {
+    unsafe {
+        simd_select(
+            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(16))),
+            vqtbx1_p8(a, vcombine_p8(b.0, b.1), c),
+            a,
+        )
+    }
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_s8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t {
+    let x = int8x16x2_t(
+        vcombine_s8(b.0, b.1),
+        vcombine_s8(b.2, unsafe { crate::mem::zeroed() }),
+    );
+    unsafe {
+        simd_select(
+            simd_lt::<int8x8_t, int8x8_t>(c, transmute(i8x8::splat(24))),
+            vqtbx2_s8(a, x, vreinterpret_u8_s8(c)),
+            a,
+        )
+    }
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t {
+    let x = uint8x16x2_t(
+        vcombine_u8(b.0, b.1),
+        vcombine_u8(b.2, unsafe { crate::mem::zeroed() }),
+    );
+    unsafe {
+        simd_select(
+            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(24))),
+            vqtbx2_u8(a, x, c),
+            a,
+        )
+    }
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t {
+    let x = poly8x16x2_t(
+        vcombine_p8(b.0, b.1),
+        vcombine_p8(b.2, unsafe { crate::mem::zeroed() }),
+    );
+    unsafe {
+        simd_select(
+            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(24))),
+            vqtbx2_p8(a, x, c),
+            a,
+        )
+    }
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_s8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t {
+    let x = int8x16x2_t(vcombine_s8(b.0, b.1), vcombine_s8(b.2, b.3));
+    unsafe {
+        simd_select(
+            simd_lt::<int8x8_t, int8x8_t>(c, transmute(i8x8::splat(32))),
+            vqtbx2_s8(a, x, vreinterpret_u8_s8(c)),
+            a,
+        )
+    }
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t {
+    let x = uint8x16x2_t(vcombine_u8(b.0, b.1), vcombine_u8(b.2, b.3));
+    unsafe {
+        simd_select(
+            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(32))),
+            vqtbx2_u8(a, x, c),
+            a,
+        )
+    }
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t {
+    let x = poly8x16x2_t(vcombine_p8(b.0, b.1), vcombine_p8(b.2, b.3));
+    unsafe {
+        simd_select(
+            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(32))),
+            vqtbx2_p8(a, x, c),
+            a,
+        )
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f16)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe {
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f16)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f32)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vtrn1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe { simd_shuffle!(a, b, [0, 2]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vtrn1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe {
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float32x2_t = simd_shuffle!(a, b, [0, 2]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f64)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vtrn1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe { simd_shuffle!(a, b, [0, 2]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vtrn1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float64x2_t = simd_shuffle!(a, b, [0, 2]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s32)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vtrn1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe { simd_shuffle!(a, b, [0, 2]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vtrn1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe {
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: int32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: int32x2_t = simd_shuffle!(a, b, [0, 2]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s64)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vtrn1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe { simd_shuffle!(a, b, [0, 2]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vtrn1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe {
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: int64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: int64x2_t = simd_shuffle!(a, b, [0, 2]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u32)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vtrn1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe { simd_shuffle!(a, b, [0, 2]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vtrn1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe {
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: uint32x2_t = simd_shuffle!(a, b, [0, 2]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u64)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vtrn1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe { simd_shuffle!(a, b, [0, 2]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vtrn1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe {
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: uint64x2_t = simd_shuffle!(a, b, [0, 2]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p64)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vtrn1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    unsafe { simd_shuffle!(a, b, [0, 2]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vtrn1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    unsafe {
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: poly64x2_t = simd_shuffle!(a, b, [0, 2]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f32)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe {
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s8)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe {
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s8)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe {
+        simd_shuffle!(
+            a,
+            b,
+            [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
+        )
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe {
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = simd_shuffle!(
+            a,
+            b,
+            [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
+        );
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s16)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe {
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: int16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s16)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe {
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s32)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe {
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: int32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u8)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe {
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u8)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe {
+        simd_shuffle!(
+            a,
+            b,
+            [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
+        )
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe {
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t = simd_shuffle!(
+            a,
+            b,
+            [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
+        );
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u16)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe {
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: uint16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u16)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe {
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u32)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe {
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_p8)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_p8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    unsafe {
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p8)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    unsafe {
+        simd_shuffle!(
+            a,
+            b,
+            [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
+        )
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t = simd_shuffle!(
+            a,
+            b,
+            [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
+        );
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_p16)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_p16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    unsafe {
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: poly16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p16)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
+pub fn vtrn1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    unsafe {
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_f16)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe {
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f16)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_f32)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
+pub fn vtrn2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe { simd_shuffle!(a, b, [1, 3]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
+pub fn vtrn2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe {
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float32x2_t = simd_shuffle!(a, b, [1, 3]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f64)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
+pub fn vtrn2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe { simd_shuffle!(a, b, [1, 3]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
+pub fn vtrn2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float64x2_t = simd_shuffle!(a, b, [1, 3]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s32)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
+pub fn vtrn2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe { simd_shuffle!(a, b, [1, 3]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
+pub fn vtrn2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe {
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: int32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: int32x2_t = simd_shuffle!(a, b, [1, 3]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s64)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
+pub fn vtrn2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe { simd_shuffle!(a, b, [1, 3]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
+pub fn vtrn2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe {
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: int64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: int64x2_t = simd_shuffle!(a, b, [1, 3]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u32)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
+pub fn vtrn2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe { simd_shuffle!(a, b, [1, 3]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
+pub fn vtrn2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe {
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: uint32x2_t = simd_shuffle!(a, b, [1, 3]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u64)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
+pub fn vtrn2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe { simd_shuffle!(a, b, [1, 3]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
+pub fn vtrn2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe {
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: uint64x2_t = simd_shuffle!(a, b, [1, 3]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p64)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
+pub fn vtrn2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    unsafe { simd_shuffle!(a, b, [1, 3]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
+pub fn vtrn2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    unsafe {
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: poly64x2_t = simd_shuffle!(a, b, [1, 3]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f32)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe {
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s8)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe {
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s8)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe {
+        simd_shuffle!(
+            a,
+            b,
+            [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]
+        )
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe {
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = simd_shuffle!(
+            a,
+            b,
+            [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]
+        );
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s16)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe {
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: int16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s16)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe {
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s32)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe {
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: int32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u8)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe {
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u8)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe {
+        simd_shuffle!(
+            a,
+            b,
+            [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]
+        )
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe {
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t = simd_shuffle!(
+            a,
+            b,
+            [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]
+        );
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u16)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe {
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: uint16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u16)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe {
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u32)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe {
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_p8)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_p8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    unsafe {
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p8)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    unsafe {
+        simd_shuffle!(
+            a,
+            b,
+            [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]
+        )
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t = simd_shuffle!(
+            a,
+            b,
+            [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]
+        );
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_p16)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_p16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    unsafe {
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: poly16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p16)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
+}
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
+pub fn vtrn2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    unsafe {
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmtst))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtst_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
+    unsafe {
+        let c: int64x1_t = simd_and(a, b);
+        let d: i64x1 = i64x1::new(0);
+        simd_ne(c, transmute(d))
+    }
+}
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmtst))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtstq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
+    unsafe {
+        let c: int64x2_t = simd_and(a, b);
+        let d: i64x2 = i64x2::new(0, 0);
+        simd_ne(c, transmute(d))
+    }
+}
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmtst))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtst_p64(a: poly64x1_t, b: poly64x1_t) -> uint64x1_t {
+    unsafe {
+        let c: poly64x1_t = simd_and(a, b);
+        let d: i64x1 = i64x1::new(0);
+        simd_ne(c, transmute(d))
+    }
+}
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmtst))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtstq_p64(a: poly64x2_t, b: poly64x2_t) -> uint64x2_t {
+    unsafe {
+        let c: poly64x2_t = simd_and(a, b);
+        let d: i64x2 = i64x2::new(0, 0);
+        simd_ne(c, transmute(d))
+    }
+}
+#[doc = "Unsigned compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmtst))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vtst_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    unsafe {
+        let c: uint64x1_t = simd_and(a, b);
+        let d: u64x1 = u64x1::new(0);
+        simd_ne(c, transmute(d))
+    }
+}
+#[doc = "Unsigned compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmtst))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fsub))]
-pub fn vsub_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    unsafe { simd_sub(a, b) }
+pub fn vtstq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe {
+        let c: uint64x2_t = simd_and(a, b);
+        let d: u64x2 = u64x2::new(0, 0);
+        simd_ne(c, transmute(d))
+    }
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f64)"]
+#[doc = "Compare bitwise test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstd_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tst))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fsub))]
-pub fn vsubq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    unsafe { simd_sub(a, b) }
+pub fn vtstd_s64(a: i64, b: i64) -> u64 {
+    unsafe { transmute(vtst_s64(transmute(a), transmute(b))) }
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubd_s64)"]
+#[doc = "Compare bitwise test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstd_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tst))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sub))]
-pub fn vsubd_s64(a: i64, b: i64) -> i64 {
-    a.wrapping_sub(b)
+pub fn vtstd_u64(a: u64, b: u64) -> u64 {
+    unsafe { transmute(vtst_u64(transmute(a), transmute(b))) }
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubd_u64)"]
+#[doc = "Signed saturating Accumulate of Unsigned value."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sub))]
-pub fn vsubd_u64(a: u64, b: u64) -> u64 {
-    a.wrapping_sub(b)
-}
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubh_f16)"]
-#[inline]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(fsub))]
-pub fn vsubh_f16(a: f16, b: f16) -> f16 {
-    a - b
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn vuqadd_s8(a: int8x8_t, b: uint8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.suqadd.v8i8"
+        )]
+        fn _vuqadd_s8(a: int8x8_t, b: uint8x8_t) -> int8x8_t;
+    }
+    unsafe { _vuqadd_s8(a, b) }
 }
-#[doc = "Signed Subtract Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s8)"]
+#[doc = "Signed saturating Accumulate of Unsigned value."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubl2))]
-pub fn vsubl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
-    unsafe {
-        let c: int16x8_t = simd_cast(vget_high_s8(a));
-        let d: int16x8_t = simd_cast(vget_high_s8(b));
-        simd_sub(c, d)
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn vuqaddq_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.suqadd.v16i8"
+        )]
+        fn _vuqaddq_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t;
     }
+    unsafe { _vuqaddq_s8(a, b) }
 }
-#[doc = "Signed Subtract Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s16)"]
+#[doc = "Signed saturating Accumulate of Unsigned value."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubl2))]
-pub fn vsubl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
-    unsafe {
-        let c: int32x4_t = simd_cast(vget_high_s16(a));
-        let d: int32x4_t = simd_cast(vget_high_s16(b));
-        simd_sub(c, d)
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn vuqadd_s16(a: int16x4_t, b: uint16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.suqadd.v4i16"
+        )]
+        fn _vuqadd_s16(a: int16x4_t, b: uint16x4_t) -> int16x4_t;
     }
+    unsafe { _vuqadd_s16(a, b) }
 }
-#[doc = "Signed Subtract Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s32)"]
+#[doc = "Signed saturating Accumulate of Unsigned value."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubl2))]
-pub fn vsubl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
-    unsafe {
-        let c: int64x2_t = simd_cast(vget_high_s32(a));
-        let d: int64x2_t = simd_cast(vget_high_s32(b));
-        simd_sub(c, d)
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn vuqaddq_s16(a: int16x8_t, b: uint16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.suqadd.v8i16"
+        )]
+        fn _vuqaddq_s16(a: int16x8_t, b: uint16x8_t) -> int16x8_t;
     }
+    unsafe { _vuqaddq_s16(a, b) }
 }
-#[doc = "Unsigned Subtract Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u8)"]
+#[doc = "Signed saturating Accumulate of Unsigned value."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubl2))]
-pub fn vsubl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
-    unsafe {
-        let c: uint16x8_t = simd_cast(vget_high_u8(a));
-        let d: uint16x8_t = simd_cast(vget_high_u8(b));
-        simd_sub(c, d)
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn vuqadd_s32(a: int32x2_t, b: uint32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.suqadd.v2i32"
+        )]
+        fn _vuqadd_s32(a: int32x2_t, b: uint32x2_t) -> int32x2_t;
     }
+    unsafe { _vuqadd_s32(a, b) }
 }
-#[doc = "Unsigned Subtract Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u16)"]
+#[doc = "Signed saturating Accumulate of Unsigned value."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubl2))]
-pub fn vsubl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
-    unsafe {
-        let c: uint32x4_t = simd_cast(vget_high_u16(a));
-        let d: uint32x4_t = simd_cast(vget_high_u16(b));
-        simd_sub(c, d)
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn vuqaddq_s32(a: int32x4_t, b: uint32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.suqadd.v4i32"
+        )]
+        fn _vuqaddq_s32(a: int32x4_t, b: uint32x4_t) -> int32x4_t;
     }
+    unsafe { _vuqaddq_s32(a, b) }
 }
-#[doc = "Unsigned Subtract Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u32)"]
+#[doc = "Signed saturating Accumulate of Unsigned value."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubl2))]
-pub fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
-    unsafe {
-        let c: uint64x2_t = simd_cast(vget_high_u32(a));
-        let d: uint64x2_t = simd_cast(vget_high_u32(b));
-        simd_sub(c, d)
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn vuqadd_s64(a: int64x1_t, b: uint64x1_t) -> int64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.suqadd.v1i64"
+        )]
+        fn _vuqadd_s64(a: int64x1_t, b: uint64x1_t) -> int64x1_t;
     }
+    unsafe { _vuqadd_s64(a, b) }
 }
-#[doc = "Signed Subtract Wide"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s8)"]
+#[doc = "Signed saturating Accumulate of Unsigned value."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubw2))]
-pub fn vsubw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
-    let c = vget_high_s8(b);
-    unsafe { simd_sub(a, simd_cast(c)) }
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn vuqaddq_s64(a: int64x2_t, b: uint64x2_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.suqadd.v2i64"
+        )]
+        fn _vuqaddq_s64(a: int64x2_t, b: uint64x2_t) -> int64x2_t;
+    }
+    unsafe { _vuqaddq_s64(a, b) }
 }
-#[doc = "Signed Subtract Wide"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s16)"]
+#[doc = "Signed saturating accumulate of unsigned value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddb_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubw2))]
-pub fn vsubw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
-    let c = vget_high_s16(b);
-    unsafe { simd_sub(a, simd_cast(c)) }
+pub fn vuqaddb_s8(a: i8, b: u8) -> i8 {
+    vget_lane_s8::<0>(vuqadd_s8(vdup_n_s8(a), vdup_n_u8(b)))
 }
-#[doc = "Signed Subtract Wide"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s32)"]
+#[doc = "Signed saturating accumulate of unsigned value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddh_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubw2))]
-pub fn vsubw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
-    let c = vget_high_s32(b);
-    unsafe { simd_sub(a, simd_cast(c)) }
+pub fn vuqaddh_s16(a: i16, b: u16) -> i16 {
+    vget_lane_s16::<0>(vuqadd_s16(vdup_n_s16(a), vdup_n_u16(b)))
 }
-#[doc = "Unsigned Subtract Wide"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u8)"]
+#[doc = "Signed saturating accumulate of unsigned value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddd_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubw2))]
-pub fn vsubw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
-    let c = vget_high_u8(b);
-    unsafe { simd_sub(a, simd_cast(c)) }
+pub fn vuqaddd_s64(a: i64, b: u64) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.suqadd.i64"
+        )]
+        fn _vuqaddd_s64(a: i64, b: u64) -> i64;
+    }
+    unsafe { _vuqaddd_s64(a, b) }
 }
-#[doc = "Unsigned Subtract Wide"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u16)"]
+#[doc = "Signed saturating accumulate of unsigned value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadds_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(suqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubw2))]
-pub fn vsubw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
-    let c = vget_high_u16(b);
-    unsafe { simd_sub(a, simd_cast(c)) }
+pub fn vuqadds_s32(a: i32, b: u32) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.suqadd.i32"
+        )]
+        fn _vuqadds_s32(a: i32, b: u32) -> i32;
+    }
+    unsafe { _vuqadds_s32(a, b) }
 }
-#[doc = "Unsigned Subtract Wide"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubw2))]
-pub fn vsubw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
-    let c = vget_high_u32(b);
-    unsafe { simd_sub(a, simd_cast(c)) }
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
+pub fn vuzp1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
 }
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_s8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    vqtbl1_s8(vcombine_s8(a, unsafe { crate::mem::zeroed() }), unsafe {
-        {
-            transmute(b)
-        }
-    })
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
+pub fn vuzp1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe {
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_u8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    vqtbl1_u8(vcombine_u8(a, unsafe { crate::mem::zeroed() }), b)
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
+pub fn vuzp1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
 }
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_p8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t {
-    vqtbl1_p8(vcombine_p8(a, unsafe { crate::mem::zeroed() }), b)
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
+pub fn vuzp1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_s8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t {
-    vqtbl1_s8(vcombine_s8(a.0, a.1), vreinterpret_u8_s8(b))
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vuzp1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t {
-    vqtbl1_u8(vcombine_u8(a.0, a.1), b)
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vuzp1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe {
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float32x2_t = simd_shuffle!(a, b, [0, 2]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t {
-    vqtbl1_p8(vcombine_p8(a.0, a.1), b)
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vuzp1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_s8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t {
-    let x = int8x16x2_t(
-        vcombine_s8(a.0, a.1),
-        vcombine_s8(a.2, unsafe { crate::mem::zeroed() }),
-    );
-    vqtbl2_s8(x, vreinterpret_u8_s8(b))
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vuzp1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float64x2_t = simd_shuffle!(a, b, [0, 2]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t {
-    let x = uint8x16x2_t(
-        vcombine_u8(a.0, a.1),
-        vcombine_u8(a.2, unsafe { crate::mem::zeroed() }),
-    );
-    vqtbl2_u8(x, b)
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vuzp1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t {
-    let x = poly8x16x2_t(
-        vcombine_p8(a.0, a.1),
-        vcombine_p8(a.2, unsafe { crate::mem::zeroed() }),
-    );
-    vqtbl2_p8(x, b)
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vuzp1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe {
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: int32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: int32x2_t = simd_shuffle!(a, b, [0, 2]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_s8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t {
-    let x = int8x16x2_t(vcombine_s8(a.0, a.1), vcombine_s8(a.2, a.3));
-    vqtbl2_s8(x, vreinterpret_u8_s8(b))
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vuzp1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t {
-    let x = uint8x16x2_t(vcombine_u8(a.0, a.1), vcombine_u8(a.2, a.3));
-    vqtbl2_u8(x, b)
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vuzp1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe {
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: int64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: int64x2_t = simd_shuffle!(a, b, [0, 2]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t {
-    let x = poly8x16x2_t(vcombine_p8(a.0, a.1), vcombine_p8(a.2, a.3));
-    vqtbl2_p8(x, b)
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vuzp1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_s8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
-    unsafe {
-        simd_select(
-            simd_lt::<int8x8_t, int8x8_t>(c, transmute(i8x8::splat(8))),
-            vqtbx1_s8(
-                a,
-                vcombine_s8(b, crate::mem::zeroed()),
-                vreinterpret_u8_s8(c),
-            ),
-            a,
-        )
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vuzp1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe {
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: uint32x2_t = simd_shuffle!(a, b, [0, 2]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_u8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
-    unsafe {
-        simd_select(
-            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(8))),
-            vqtbx1_u8(a, vcombine_u8(b, crate::mem::zeroed()), c),
-            a,
-        )
-    }
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vuzp1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_p8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t {
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vuzp1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     unsafe {
-        simd_select(
-            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(8))),
-            vqtbx1_p8(a, vcombine_p8(b, crate::mem::zeroed()), c),
-            a,
-        )
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: uint64x2_t = simd_shuffle!(a, b, [0, 2]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_s8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t {
-    unsafe {
-        simd_select(
-            simd_lt::<int8x8_t, int8x8_t>(c, transmute(i8x8::splat(16))),
-            vqtbx1_s8(a, vcombine_s8(b.0, b.1), vreinterpret_u8_s8(c)),
-            a,
-        )
-    }
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vuzp1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t {
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vuzp1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
     unsafe {
-        simd_select(
-            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(16))),
-            vqtbx1_u8(a, vcombine_u8(b.0, b.1), c),
-            a,
-        )
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: poly64x2_t = simd_shuffle!(a, b, [0, 2]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t {
-    unsafe {
-        simd_select(
-            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(16))),
-            vqtbx1_p8(a, vcombine_p8(b.0, b.1), c),
-            a,
-        )
-    }
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
+pub fn vuzp1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
 }
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_s8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t {
-    let x = int8x16x2_t(
-        vcombine_s8(b.0, b.1),
-        vcombine_s8(b.2, unsafe { crate::mem::zeroed() }),
-    );
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
+pub fn vuzp1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe {
-        simd_select(
-            simd_lt::<int8x8_t, int8x8_t>(c, transmute(i8x8::splat(24))),
-            vqtbx2_s8(a, x, vreinterpret_u8_s8(c)),
-            a,
-        )
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t {
-    let x = uint8x16x2_t(
-        vcombine_u8(b.0, b.1),
-        vcombine_u8(b.2, unsafe { crate::mem::zeroed() }),
-    );
-    unsafe {
-        simd_select(
-            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(24))),
-            vqtbx2_u8(a, x, c),
-            a,
-        )
-    }
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
+pub fn vuzp1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
 }
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t {
-    let x = poly8x16x2_t(
-        vcombine_p8(b.0, b.1),
-        vcombine_p8(b.2, unsafe { crate::mem::zeroed() }),
-    );
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
+pub fn vuzp1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unsafe {
-        simd_select(
-            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(24))),
-            vqtbx2_p8(a, x, c),
-            a,
-        )
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_s8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t {
-    let x = int8x16x2_t(vcombine_s8(b.0, b.1), vcombine_s8(b.2, b.3));
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
+pub fn vuzp1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unsafe {
-        simd_select(
-            simd_lt::<int8x8_t, int8x8_t>(c, transmute(i8x8::splat(32))),
-            vqtbx2_s8(a, x, vreinterpret_u8_s8(c)),
+        simd_shuffle!(
             a,
+            b,
+            [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
         )
     }
 }
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t {
-    let x = uint8x16x2_t(vcombine_u8(b.0, b.1), vcombine_u8(b.2, b.3));
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
+pub fn vuzp1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unsafe {
-        simd_select(
-            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(32))),
-            vqtbx2_u8(a, x, c),
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = simd_shuffle!(
             a,
+            b,
+            [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
+        );
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
         )
     }
 }
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t {
-    let x = poly8x16x2_t(vcombine_p8(b.0, b.1), vcombine_p8(b.2, b.3));
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
+pub fn vuzp1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
+}
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
+pub fn vuzp1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unsafe {
-        simd_select(
-            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(32))),
-            vqtbx2_p8(a, x, c),
-            a,
-        )
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: int16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f16)"]
-#[inline]
-#[target_feature(enable = "neon,fp16")]
-#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
-#[cfg(not(target_arch = "arm64ec"))]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s16)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn1)
+    assert_instr(uzp1)
 )]
-pub fn vtrn1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
-    unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
+pub fn vuzp1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s16)"]
 #[inline]
-#[target_feature(enable = "neon,fp16")]
-#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
-#[cfg(not(target_arch = "arm64ec"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn1)
+    assert_instr(uzp1)
 )]
-pub fn vtrn1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
-    unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
+pub fn vuzp1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe {
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(uzp1)
 )]
-pub fn vtrn1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    unsafe { simd_shuffle!(a, b, [0, 2]) }
+pub fn vuzp1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f64)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(uzp1)
 )]
-pub fn vtrn1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    unsafe { simd_shuffle!(a, b, [0, 2]) }
+pub fn vuzp1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe {
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: int32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(uzp1)
 )]
-pub fn vtrn1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe { simd_shuffle!(a, b, [0, 2]) }
+pub fn vuzp1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s64)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(uzp1)
 )]
-pub fn vtrn1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    unsafe { simd_shuffle!(a, b, [0, 2]) }
+pub fn vuzp1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe {
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(uzp1)
 )]
-pub fn vtrn1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe { simd_shuffle!(a, b, [0, 2]) }
+pub fn vuzp1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe {
+        simd_shuffle!(
+            a,
+            b,
+            [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
+        )
+    }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u64)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(uzp1)
 )]
-pub fn vtrn1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    unsafe { simd_shuffle!(a, b, [0, 2]) }
+pub fn vuzp1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe {
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t = simd_shuffle!(
+            a,
+            b,
+            [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
+        );
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p64)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(uzp1)
 )]
-pub fn vtrn1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
-    unsafe { simd_shuffle!(a, b, [0, 2]) }
+pub fn vuzp1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn1)
+    assert_instr(uzp1)
 )]
-pub fn vtrn1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
+pub fn vuzp1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe {
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: uint16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn1)
+    assert_instr(uzp1)
 )]
-pub fn vtrn1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
+pub fn vuzp1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn1)
+    assert_instr(uzp1)
 )]
-pub fn vtrn1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+pub fn vuzp1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unsafe {
-        simd_shuffle!(
-            a,
-            b,
-            [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
-        )
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn1)
+    assert_instr(uzp1)
 )]
-pub fn vtrn1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
+pub fn vuzp1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn1)
+    assert_instr(uzp1)
 )]
-pub fn vtrn1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
+pub fn vuzp1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe {
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn1)
+    assert_instr(uzp1)
 )]
-pub fn vtrn1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
+pub fn vuzp1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_p8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn1)
+    assert_instr(uzp1)
 )]
-pub fn vtrn1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
+pub fn vuzp1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    unsafe {
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn1)
+    assert_instr(uzp1)
 )]
-pub fn vtrn1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+pub fn vuzp1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     unsafe {
         simd_shuffle!(
             a,
             b,
-            [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
+            [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
+        )
+    }
+}
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
+pub fn vuzp1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t = simd_shuffle!(
+            a,
+            b,
+            [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
+        );
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
         )
     }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(
-    all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn1)
-)]
-pub fn vtrn1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
-}
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_p16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn1)
+    assert_instr(uzp1)
 )]
-pub fn vtrn1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
+pub fn vuzp1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_p16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn1)
+    assert_instr(uzp1)
 )]
-pub fn vtrn1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
+pub fn vuzp1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    unsafe {
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: poly16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_p8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn1)
+    assert_instr(uzp1)
 )]
-pub fn vtrn1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
+pub fn vuzp1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn1)
+    assert_instr(uzp1)
 )]
-pub fn vtrn1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+pub fn vuzp1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
     unsafe {
-        simd_shuffle!(
-            a,
-            b,
-            [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
-        )
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_p16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn1)
+    assert_instr(uzp2)
 )]
-pub fn vtrn1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
+pub fn vuzp2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn1)
+    assert_instr(uzp2)
 )]
-pub fn vtrn1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
+pub fn vuzp2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe {
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_f16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,fp16")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn2)
+    assert_instr(uzp2)
 )]
-pub fn vtrn2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
-    unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
+pub fn vuzp2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,fp16")]
 #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn2)
+    assert_instr(uzp2)
 )]
-pub fn vtrn2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
-    unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
+pub fn vuzp2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_f32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vtrn2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+pub fn vuzp2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f64)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vtrn2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    unsafe { simd_shuffle!(a, b, [1, 3]) }
+pub fn vuzp2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe {
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float32x2_t = simd_shuffle!(a, b, [1, 3]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vtrn2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+pub fn vuzp2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s64)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vtrn2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    unsafe { simd_shuffle!(a, b, [1, 3]) }
+pub fn vuzp2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float64x2_t = simd_shuffle!(a, b, [1, 3]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vtrn2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+pub fn vuzp2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u64)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vtrn2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    unsafe { simd_shuffle!(a, b, [1, 3]) }
+pub fn vuzp2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe {
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: int32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: int32x2_t = simd_shuffle!(a, b, [1, 3]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p64)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vtrn2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+pub fn vuzp2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn2)
+    assert_instr(zip2)
 )]
-pub fn vtrn2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
+pub fn vuzp2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe {
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: int64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: int64x2_t = simd_shuffle!(a, b, [1, 3]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn2)
+    assert_instr(zip2)
 )]
-pub fn vtrn2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
+pub fn vuzp2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn2)
+    assert_instr(zip2)
 )]
-pub fn vtrn2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+pub fn vuzp2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unsafe {
-        simd_shuffle!(
-            a,
-            b,
-            [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]
-        )
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: uint32x2_t = simd_shuffle!(a, b, [1, 3]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(
-    all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn2)
-)]
-pub fn vtrn2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
-}
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn2)
+    assert_instr(zip2)
 )]
-pub fn vtrn2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
+pub fn vuzp2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn2)
+    assert_instr(zip2)
 )]
-pub fn vtrn2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
+pub fn vuzp2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe {
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: uint64x2_t = simd_shuffle!(a, b, [1, 3]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn2)
+    assert_instr(zip2)
 )]
-pub fn vtrn2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
+pub fn vuzp2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn2)
+    assert_instr(zip2)
 )]
-pub fn vtrn2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+pub fn vuzp2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
     unsafe {
-        simd_shuffle!(
-            a,
-            b,
-            [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]
-        )
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: poly64x2_t = simd_shuffle!(a, b, [1, 3]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn2)
+    assert_instr(uzp2)
 )]
-pub fn vtrn2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
+pub fn vuzp2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn2)
+    assert_instr(uzp2)
 )]
-pub fn vtrn2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
+pub fn vuzp2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe {
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn2)
+    assert_instr(uzp2)
 )]
-pub fn vtrn2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
+pub fn vuzp2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_p8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn2)
+    assert_instr(uzp2)
 )]
-pub fn vtrn2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
+pub fn vuzp2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe {
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn2)
+    assert_instr(uzp2)
 )]
-pub fn vtrn2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+pub fn vuzp2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unsafe {
         simd_shuffle!(
             a,
             b,
-            [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]
+            [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
         )
     }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_p16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn2)
+    assert_instr(uzp2)
 )]
-pub fn vtrn2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
+pub fn vuzp2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe {
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = simd_shuffle!(
+            a,
+            b,
+            [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
+        );
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
 }
-#[doc = "Transpose vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(trn2)
+    assert_instr(uzp2)
 )]
-pub fn vtrn2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
-}
-#[doc = "Signed compare bitwise Test bits nonzero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmtst))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtst_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
-    unsafe {
-        let c: int64x1_t = simd_and(a, b);
-        let d: i64x1 = i64x1::new(0);
-        simd_ne(c, transmute(d))
-    }
+pub fn vuzp2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
 }
-#[doc = "Signed compare bitwise Test bits nonzero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s64)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmtst))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtstq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
+pub fn vuzp2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unsafe {
-        let c: int64x2_t = simd_and(a, b);
-        let d: i64x2 = i64x2::new(0, 0);
-        simd_ne(c, transmute(d))
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: int16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
-#[doc = "Signed compare bitwise Test bits nonzero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p64)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmtst))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtst_p64(a: poly64x1_t, b: poly64x1_t) -> uint64x1_t {
-    unsafe {
-        let c: poly64x1_t = simd_and(a, b);
-        let d: i64x1 = i64x1::new(0);
-        simd_ne(c, transmute(d))
-    }
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
+pub fn vuzp2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
 }
-#[doc = "Signed compare bitwise Test bits nonzero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p64)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmtst))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtstq_p64(a: poly64x2_t, b: poly64x2_t) -> uint64x2_t {
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
+pub fn vuzp2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unsafe {
-        let c: poly64x2_t = simd_and(a, b);
-        let d: i64x2 = i64x2::new(0, 0);
-        simd_ne(c, transmute(d))
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
-#[doc = "Unsigned compare bitwise Test bits nonzero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u64)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmtst))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtst_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    unsafe {
-        let c: uint64x1_t = simd_and(a, b);
-        let d: u64x1 = u64x1::new(0);
-        simd_ne(c, transmute(d))
-    }
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
+pub fn vuzp2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
 }
-#[doc = "Unsigned compare bitwise Test bits nonzero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u64)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmtst))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtstq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
+pub fn vuzp2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unsafe {
-        let c: uint64x2_t = simd_and(a, b);
-        let d: u64x2 = u64x2::new(0, 0);
-        simd_ne(c, transmute(d))
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: int32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
-#[doc = "Compare bitwise test bits nonzero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstd_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tst))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtstd_s64(a: i64, b: i64) -> u64 {
-    unsafe { transmute(vtst_s64(transmute(a), transmute(b))) }
-}
-#[doc = "Compare bitwise test bits nonzero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstd_u64)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tst))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtstd_u64(a: u64, b: u64) -> u64 {
-    unsafe { transmute(vtst_u64(transmute(a), transmute(b))) }
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
+pub fn vuzp2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
 }
-#[doc = "Signed saturating Accumulate of Unsigned value."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(suqadd))]
-pub fn vuqadd_s8(a: int8x8_t, b: uint8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.suqadd.v8i8"
-        )]
-        fn _vuqadd_s8(a: int8x8_t, b: uint8x8_t) -> int8x8_t;
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
+pub fn vuzp2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe {
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
-    unsafe { _vuqadd_s8(a, b) }
 }
-#[doc = "Signed saturating Accumulate of Unsigned value."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(suqadd))]
-pub fn vuqaddq_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.suqadd.v16i8"
-        )]
-        fn _vuqaddq_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t;
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
+pub fn vuzp2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe {
+        simd_shuffle!(
+            a,
+            b,
+            [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
+        )
     }
-    unsafe { _vuqaddq_s8(a, b) }
 }
-#[doc = "Signed saturating Accumulate of Unsigned value."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(suqadd))]
-pub fn vuqadd_s16(a: int16x4_t, b: uint16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.suqadd.v4i16"
-        )]
-        fn _vuqadd_s16(a: int16x4_t, b: uint16x4_t) -> int16x4_t;
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
+pub fn vuzp2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe {
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t = simd_shuffle!(
+            a,
+            b,
+            [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
+        );
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
-    unsafe { _vuqadd_s16(a, b) }
 }
-#[doc = "Signed saturating Accumulate of Unsigned value."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(suqadd))]
-pub fn vuqaddq_s16(a: int16x8_t, b: uint16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.suqadd.v8i16"
-        )]
-        fn _vuqaddq_s16(a: int16x8_t, b: uint16x8_t) -> int16x8_t;
-    }
-    unsafe { _vuqaddq_s16(a, b) }
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
+pub fn vuzp2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
 }
-#[doc = "Signed saturating Accumulate of Unsigned value."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(suqadd))]
-pub fn vuqadd_s32(a: int32x2_t, b: uint32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.suqadd.v2i32"
-        )]
-        fn _vuqadd_s32(a: int32x2_t, b: uint32x2_t) -> int32x2_t;
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
+pub fn vuzp2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe {
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: uint16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
-    unsafe { _vuqadd_s32(a, b) }
 }
-#[doc = "Signed saturating Accumulate of Unsigned value."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(suqadd))]
-pub fn vuqaddq_s32(a: int32x4_t, b: uint32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.suqadd.v4i32"
-        )]
-        fn _vuqaddq_s32(a: int32x4_t, b: uint32x4_t) -> int32x4_t;
-    }
-    unsafe { _vuqaddq_s32(a, b) }
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
+pub fn vuzp2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
 }
-#[doc = "Signed saturating Accumulate of Unsigned value."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s64)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(suqadd))]
-pub fn vuqadd_s64(a: int64x1_t, b: uint64x1_t) -> int64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.suqadd.v1i64"
-        )]
-        fn _vuqadd_s64(a: int64x1_t, b: uint64x1_t) -> int64x1_t;
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
+pub fn vuzp2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe {
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
-    unsafe { _vuqadd_s64(a, b) }
 }
-#[doc = "Signed saturating Accumulate of Unsigned value."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s64)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(suqadd))]
-pub fn vuqaddq_s64(a: int64x2_t, b: uint64x2_t) -> int64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.suqadd.v2i64"
-        )]
-        fn _vuqaddq_s64(a: int64x2_t, b: uint64x2_t) -> int64x2_t;
-    }
-    unsafe { _vuqaddq_s64(a, b) }
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
+pub fn vuzp2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
 }
-#[doc = "Signed saturating accumulate of unsigned value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddb_s8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(suqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vuqaddb_s8(a: i8, b: u8) -> i8 {
-    vget_lane_s8::<0>(vuqadd_s8(vdup_n_s8(a), vdup_n_u8(b)))
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
+pub fn vuzp2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe {
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Signed saturating accumulate of unsigned value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddh_s16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(suqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vuqaddh_s16(a: i16, b: u16) -> i16 {
-    vget_lane_s16::<0>(vuqadd_s16(vdup_n_s16(a), vdup_n_u16(b)))
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
+pub fn vuzp2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
 }
-#[doc = "Signed saturating accumulate of unsigned value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddd_s64)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_p8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(suqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vuqaddd_s64(a: i64, b: u64) -> i64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.suqadd.i64"
-        )]
-        fn _vuqaddd_s64(a: i64, b: u64) -> i64;
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
+pub fn vuzp2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    unsafe {
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
-    unsafe { _vuqaddd_s64(a, b) }
 }
-#[doc = "Signed saturating accumulate of unsigned value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadds_s32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(suqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vuqadds_s32(a: i32, b: u32) -> i32 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.suqadd.i32"
-        )]
-        fn _vuqadds_s32(a: i32, b: u32) -> i32;
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
+pub fn vuzp2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    unsafe {
+        simd_shuffle!(
+            a,
+            b,
+            [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
+        )
     }
-    unsafe { _vuqadds_s32(a, b) }
 }
 #[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p8)"]
 #[inline]
-#[target_feature(enable = "neon,fp16")]
-#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
-#[cfg(not(target_arch = "arm64ec"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp1)
+    assert_instr(uzp2)
 )]
-pub fn vuzp1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
-    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
+pub fn vuzp2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t = simd_shuffle!(
+            a,
+            b,
+            [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
+        );
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
 }
 #[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_p16)"]
 #[inline]
-#[target_feature(enable = "neon,fp16")]
-#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
-#[cfg(not(target_arch = "arm64ec"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp1)
+    assert_instr(uzp2)
 )]
-pub fn vuzp1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
-    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
+pub fn vuzp2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
 }
 #[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_p16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(uzp2)
 )]
-pub fn vuzp1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    unsafe { simd_shuffle!(a, b, [0, 2]) }
+pub fn vuzp2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    unsafe {
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: poly16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(uzp2)
 )]
-pub fn vuzp1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    unsafe { simd_shuffle!(a, b, [0, 2]) }
+pub fn vuzp2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
 }
 #[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
+pub fn vuzp2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    unsafe {
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Exclusive OR and rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vxarq_u64)"]
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(xar, IMM6 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+pub fn vxarq_u64<const IMM6: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(IMM6, 6);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.xar"
+        )]
+        fn _vxarq_u64(a: uint64x2_t, b: uint64x2_t, n: i64) -> uint64x2_t;
+    }
+    unsafe { _vxarq_u64(a, b, IMM6 as i64) }
+}
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f16)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip1)
 )]
-pub fn vuzp1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe { simd_shuffle!(a, b, [0, 2]) }
+pub fn vzip1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s64)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip1)
 )]
-pub fn vuzp1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    unsafe { simd_shuffle!(a, b, [0, 2]) }
+pub fn vzip1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe {
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u32)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip1)
 )]
-pub fn vuzp1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe { simd_shuffle!(a, b, [0, 2]) }
+pub fn vzip1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u64)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip1)
 )]
-pub fn vuzp1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    unsafe { simd_shuffle!(a, b, [0, 2]) }
+pub fn vzip1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p64)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip1)
 )]
-pub fn vuzp1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+pub fn vzip1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f32)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
-pub fn vuzp1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
+pub fn vzip1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe {
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float32x2_t = simd_shuffle!(a, b, [0, 2]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s8)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
-pub fn vuzp1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
+pub fn vzip1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s8)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
-pub fn vuzp1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+pub fn vzip1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe {
-        simd_shuffle!(
-            a,
-            b,
-            [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
-        )
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s16)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
-pub fn vuzp1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
+pub fn vzip1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s16)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
-pub fn vuzp1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
+pub fn vzip1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float64x2_t = simd_shuffle!(a, b, [0, 2]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s32)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
-pub fn vuzp1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
+pub fn vzip1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u8)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
-pub fn vuzp1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
+pub fn vzip1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe {
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u8)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
-pub fn vuzp1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+pub fn vzip1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unsafe {
         simd_shuffle!(
             a,
             b,
-            [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
+            [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
         )
     }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u16)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
-pub fn vuzp1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
+pub fn vzip1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe {
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = simd_shuffle!(
+            a,
+            b,
+            [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
+        );
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u16)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
-pub fn vuzp1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
+pub fn vzip1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u32)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
-pub fn vuzp1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
+pub fn vzip1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe {
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: int16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_p8)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
-pub fn vuzp1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
+pub fn vzip1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p8)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
-pub fn vuzp1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+pub fn vzip1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unsafe {
-        simd_shuffle!(
-            a,
-            b,
-            [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
-        )
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_p16)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
-pub fn vuzp1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
+pub fn vzip1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p16)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp1)
-)]
-pub fn vuzp1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
-}
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_f16)"]
-#[inline]
-#[target_feature(enable = "neon,fp16")]
-#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(
-    all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp2)
+    assert_instr(zip1)
 )]
-pub fn vuzp2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
-    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
+pub fn vzip1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe {
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: int32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: int32x2_t = simd_shuffle!(a, b, [0, 2]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f16)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s32)"]
 #[inline]
-#[target_feature(enable = "neon,fp16")]
-#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
-#[cfg(not(target_arch = "arm64ec"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp2)
+    assert_instr(zip1)
 )]
-pub fn vuzp2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
-    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
+pub fn vzip1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_f32)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip2)
+    assert_instr(zip1)
 )]
-pub fn vuzp2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    unsafe { simd_shuffle!(a, b, [1, 3]) }
+pub fn vzip1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe {
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: int32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f64)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip2)
+    assert_instr(zip1)
 )]
-pub fn vuzp2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    unsafe { simd_shuffle!(a, b, [1, 3]) }
+pub fn vzip1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s32)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip2)
+    assert_instr(zip1)
 )]
-pub fn vuzp2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe { simd_shuffle!(a, b, [1, 3]) }
+pub fn vzip1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe {
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: int64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: int64x2_t = simd_shuffle!(a, b, [0, 2]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s64)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip2)
+    assert_instr(zip1)
 )]
-pub fn vuzp2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    unsafe { simd_shuffle!(a, b, [1, 3]) }
+pub fn vzip1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u32)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip2)
+    assert_instr(zip1)
 )]
-pub fn vuzp2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe { simd_shuffle!(a, b, [1, 3]) }
+pub fn vzip1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe {
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u64)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip2)
+    assert_instr(zip1)
 )]
-pub fn vuzp2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    unsafe { simd_shuffle!(a, b, [1, 3]) }
+pub fn vzip1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe {
+        simd_shuffle!(
+            a,
+            b,
+            [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
+        )
+    }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p64)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip2)
+    assert_instr(zip1)
 )]
-pub fn vuzp2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
-    unsafe { simd_shuffle!(a, b, [1, 3]) }
+pub fn vzip1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe {
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t = simd_shuffle!(
+            a,
+            b,
+            [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
+        );
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f32)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp2)
+    assert_instr(zip1)
 )]
-pub fn vuzp2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
+pub fn vzip1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s8)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp2)
+    assert_instr(zip1)
 )]
-pub fn vuzp2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
+pub fn vzip1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe {
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: uint16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s8)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp2)
+    assert_instr(zip1)
 )]
-pub fn vuzp2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe {
-        simd_shuffle!(
-            a,
-            b,
-            [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
-        )
-    }
+pub fn vzip1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s16)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp2)
+    assert_instr(zip1)
 )]
-pub fn vuzp2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
+pub fn vzip1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe {
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s16)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp2)
+    assert_instr(zip1)
 )]
-pub fn vuzp2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
+pub fn vzip1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s32)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp2)
+    assert_instr(zip1)
 )]
-pub fn vuzp2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
+pub fn vzip1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe {
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: uint32x2_t = simd_shuffle!(a, b, [0, 2]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u8)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp2)
+    assert_instr(zip1)
 )]
-pub fn vuzp2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
+pub fn vzip1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u8)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp2)
+    assert_instr(zip1)
 )]
-pub fn vuzp2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+pub fn vzip1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unsafe {
-        simd_shuffle!(
-            a,
-            b,
-            [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
-        )
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u16)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp2)
+    assert_instr(zip1)
 )]
-pub fn vuzp2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
+pub fn vzip1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u16)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp2)
+    assert_instr(zip1)
 )]
-pub fn vuzp2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
+pub fn vzip1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe {
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: uint64x2_t = simd_shuffle!(a, b, [0, 2]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u32)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp2)
+    assert_instr(zip1)
 )]
-pub fn vuzp2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
+pub fn vzip1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_p8)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_p8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp2)
+    assert_instr(zip1)
 )]
-pub fn vuzp2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
+pub fn vzip1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    unsafe {
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p8)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp2)
+    assert_instr(zip1)
 )]
-pub fn vuzp2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+pub fn vzip1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     unsafe {
         simd_shuffle!(
             a,
             b,
-            [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
+            [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
         )
     }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_p16)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp2)
+    assert_instr(zip1)
 )]
-pub fn vuzp2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
+pub fn vzip1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t = simd_shuffle!(
+            a,
+            b,
+            [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
+        );
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p16)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_p16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(uzp2)
+    assert_instr(zip1)
 )]
-pub fn vuzp2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
+pub fn vzip1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
 }
-#[doc = "Exclusive OR and rotate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vxarq_u64)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_p16)"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(xar, IMM6 = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub fn vxarq_u64<const IMM6: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(IMM6, 6);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.xar"
-        )]
-        fn _vxarq_u64(a: uint64x2_t, b: uint64x2_t, n: i64) -> uint64x2_t;
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
+pub fn vzip1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    unsafe {
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: poly16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
-    unsafe { _vxarq_u64(a, b, IMM6 as i64) }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p16)"]
 #[inline]
-#[target_feature(enable = "neon,fp16")]
-#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
-#[cfg(not(target_arch = "arm64ec"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip1)
 )]
-pub fn vzip1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
-    unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
+pub fn vzip1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p16)"]
 #[inline]
-#[target_feature(enable = "neon,fp16")]
-#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
-#[cfg(not(target_arch = "arm64ec"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip1)
 )]
-pub fn vzip1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
-    unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
+pub fn vzip1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    unsafe {
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip1)
 )]
-pub fn vzip1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+pub fn vzip1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip1)
 )]
-pub fn vzip1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
+pub fn vzip1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    unsafe {
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: poly64x2_t = simd_shuffle!(a, b, [0, 2]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(zip2)
 )]
-pub fn vzip1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    unsafe { simd_shuffle!(a, b, [0, 2]) }
+pub fn vzip2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(zip2)
 )]
-pub fn vzip1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
+pub fn vzip2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe {
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(zip2)
 )]
-pub fn vzip1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe {
-        simd_shuffle!(
-            a,
-            b,
-            [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
-        )
-    }
+pub fn vzip2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,fp16")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+#[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(zip2)
 )]
-pub fn vzip1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
+pub fn vzip2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(zip2)
 )]
-pub fn vzip1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
+pub fn vzip2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(zip2)
 )]
-pub fn vzip1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe { simd_shuffle!(a, b, [0, 2]) }
+pub fn vzip2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe {
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float32x2_t = simd_shuffle!(a, b, [1, 3]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(zip2)
 )]
-pub fn vzip1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
+pub fn vzip2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(zip2)
 )]
-pub fn vzip1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    unsafe { simd_shuffle!(a, b, [0, 2]) }
+pub fn vzip2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe {
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(zip2)
 )]
-pub fn vzip1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
+pub fn vzip2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(zip2)
 )]
-pub fn vzip1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+pub fn vzip2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     unsafe {
-        simd_shuffle!(
-            a,
-            b,
-            [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
-        )
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float64x2_t = simd_shuffle!(a, b, [1, 3]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(zip2)
 )]
-pub fn vzip1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
+pub fn vzip2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(zip2)
 )]
-pub fn vzip1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
+pub fn vzip2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe {
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(zip2)
 )]
-pub fn vzip1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe { simd_shuffle!(a, b, [0, 2]) }
+pub fn vzip2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe {
+        simd_shuffle!(
+            a,
+            b,
+            [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
+        )
+    }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(zip2)
 )]
-pub fn vzip1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
+pub fn vzip2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe {
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = simd_shuffle!(
+            a,
+            b,
+            [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
+        );
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(zip2)
 )]
-pub fn vzip1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    unsafe { simd_shuffle!(a, b, [0, 2]) }
+pub fn vzip2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(zip2)
 )]
-pub fn vzip1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
+pub fn vzip2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe {
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: int16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(zip2)
 )]
-pub fn vzip1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    unsafe {
-        simd_shuffle!(
-            a,
-            b,
-            [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
-        )
-    }
+pub fn vzip2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(zip2)
 )]
-pub fn vzip1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
+pub fn vzip2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe {
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(zip2)
 )]
-pub fn vzip1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
+pub fn vzip2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
-    assert_instr(zip1)
+    assert_instr(zip2)
 )]
-pub fn vzip1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
-    unsafe { simd_shuffle!(a, b, [0, 2]) }
+pub fn vzip2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe {
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: int32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: int32x2_t = simd_shuffle!(a, b, [1, 3]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s32)"]
 #[inline]
-#[target_feature(enable = "neon,fp16")]
-#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
-#[cfg(not(target_arch = "arm64ec"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vzip2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+pub fn vzip2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s32)"]
 #[inline]
-#[target_feature(enable = "neon,fp16")]
-#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
-#[cfg(not(target_arch = "arm64ec"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vzip2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
-    unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
+pub fn vzip2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe {
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: int32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vzip2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+pub fn vzip2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vzip2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
+pub fn vzip2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe {
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: int64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: int64x2_t = simd_shuffle!(a, b, [1, 3]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vzip2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    unsafe { simd_shuffle!(a, b, [1, 3]) }
+pub fn vzip2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vzip2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
+pub fn vzip2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe {
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vzip2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+pub fn vzip2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unsafe {
         simd_shuffle!(
             a,
@@ -24961,158 +31171,205 @@ pub fn vzip2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vzip2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
+pub fn vzip2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe {
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t = simd_shuffle!(
+            a,
+            b,
+            [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
+        );
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vzip2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
+pub fn vzip2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vzip2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe { simd_shuffle!(a, b, [1, 3]) }
+pub fn vzip2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe {
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: uint16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vzip2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
+pub fn vzip2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vzip2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    unsafe { simd_shuffle!(a, b, [1, 3]) }
+pub fn vzip2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe {
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vzip2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
+pub fn vzip2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vzip2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+pub fn vzip2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unsafe {
-        simd_shuffle!(
-            a,
-            b,
-            [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
-        )
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: uint32x2_t = simd_shuffle!(a, b, [1, 3]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vzip2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+pub fn vzip2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vzip2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
+pub fn vzip2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe {
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vzip2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+pub fn vzip2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vzip2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
+pub fn vzip2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe {
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: uint64x2_t = simd_shuffle!(a, b, [1, 3]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
     all(test, not(target_env = "msvc"), target_endian = "little"),
     assert_instr(zip2)
 )]
-pub fn vzip2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    unsafe { simd_shuffle!(a, b, [1, 3]) }
+pub fn vzip2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
 }
 #[doc = "Zip vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_p8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
@@ -25120,11 +31377,17 @@ pub fn vzip2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     assert_instr(zip2)
 )]
 pub fn vzip2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
+    unsafe {
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Zip vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
@@ -25141,8 +31404,37 @@ pub fn vzip2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     }
 }
 #[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_p8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
+pub fn vzip2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t = simd_shuffle!(
+            a,
+            b,
+            [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
+        );
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Zip vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_p16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
@@ -25153,8 +31445,27 @@ pub fn vzip2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
     unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
 }
 #[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_p16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
+pub fn vzip2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    unsafe {
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: poly16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Zip vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_p16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
@@ -25165,8 +31476,27 @@ pub fn vzip2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
     unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
 }
 #[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_p16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
+pub fn vzip2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    unsafe {
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Zip vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_p64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(
@@ -25176,3 +31506,21 @@ pub fn vzip2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
 pub fn vzip2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_p64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
+pub fn vzip2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    unsafe {
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: poly64x2_t = simd_shuffle!(a, b, [1, 3]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index c5bd5c8917880..5284a3c44af49 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -3154,6 +3154,7 @@ pub fn vaddw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t {
 #[doc = "AES single round encryption."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesdq_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(test, assert_instr(aesd))]
@@ -3177,8 +3178,52 @@ pub fn vaesdq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t {
     unsafe { _vaesdq_u8(data, key) }
 }
 #[doc = "AES single round encryption."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesdq_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(aesd))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+)]
+pub fn vaesdq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.aesd"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aesd")]
+        fn _vaesdq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t;
+    }
+    unsafe {
+        let data: uint8x16_t = simd_shuffle!(
+            data,
+            data,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let key: uint8x16_t = simd_shuffle!(
+            key,
+            key,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let ret_val: uint8x16_t = _vaesdq_u8(data, key);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "AES single round encryption."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaeseq_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(test, assert_instr(aese))]
@@ -3201,9 +3246,53 @@ pub fn vaeseq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t {
     }
     unsafe { _vaeseq_u8(data, key) }
 }
+#[doc = "AES single round encryption."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaeseq_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(aese))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+)]
+pub fn vaeseq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.aese"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aese")]
+        fn _vaeseq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t;
+    }
+    unsafe {
+        let data: uint8x16_t = simd_shuffle!(
+            data,
+            data,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let key: uint8x16_t = simd_shuffle!(
+            key,
+            key,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let ret_val: uint8x16_t = _vaeseq_u8(data, key);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
 #[doc = "AES inverse mix columns."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesimcq_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(test, assert_instr(aesimc))]
@@ -3226,9 +3315,48 @@ pub fn vaesimcq_u8(data: uint8x16_t) -> uint8x16_t {
     }
     unsafe { _vaesimcq_u8(data) }
 }
+#[doc = "AES inverse mix columns."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesimcq_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(aesimc))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+)]
+pub fn vaesimcq_u8(data: uint8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.aesimc"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aesimc")]
+        fn _vaesimcq_u8(data: uint8x16_t) -> uint8x16_t;
+    }
+    unsafe {
+        let data: uint8x16_t = simd_shuffle!(
+            data,
+            data,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let ret_val: uint8x16_t = _vaesimcq_u8(data);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
 #[doc = "AES mix columns."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesmcq_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(test, assert_instr(aesmc))]
@@ -3251,6 +3379,44 @@ pub fn vaesmcq_u8(data: uint8x16_t) -> uint8x16_t {
     }
     unsafe { _vaesmcq_u8(data) }
 }
+#[doc = "AES mix columns."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesmcq_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(aesmc))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+)]
+pub fn vaesmcq_u8(data: uint8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.aesmc"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aesmc")]
+        fn _vaesmcq_u8(data: uint8x16_t) -> uint8x16_t;
+    }
+    unsafe {
+        let data: uint8x16_t = simd_shuffle!(
+            data,
+            data,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        let ret_val: uint8x16_t = _vaesmcq_u8(data);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
 #[doc = "Vector bitwise and"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_s8)"]
 #[inline]
@@ -7594,6 +7760,7 @@ pub fn vcntq_p8(a: poly8x16_t) -> poly8x16_t {
 #[doc = "Join two smaller vectors into a single larger vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
@@ -7611,8 +7778,34 @@ pub fn vcombine_f16(a: float16x4_t, b: float16x4_t) -> float16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) }
 }
 #[doc = "Join two smaller vectors into a single larger vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vcombine_f16(a: float16x4_t, b: float16x4_t) -> float16x8_t {
+    unsafe {
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float16x8_t = simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Join two smaller vectors into a single larger vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -7628,8 +7821,32 @@ pub fn vcombine_f32(a: float32x2_t, b: float32x2_t) -> float32x4_t {
     unsafe { simd_shuffle!(a, b, [0, 1, 2, 3]) }
 }
 #[doc = "Join two smaller vectors into a single larger vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vcombine_f32(a: float32x2_t, b: float32x2_t) -> float32x4_t {
+    unsafe {
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float32x4_t = simd_shuffle!(a, b, [0, 1, 2, 3]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Join two smaller vectors into a single larger vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -7645,8 +7862,37 @@ pub fn vcombine_s8(a: int8x8_t, b: int8x8_t) -> int8x16_t {
     unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) }
 }
 #[doc = "Join two smaller vectors into a single larger vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vcombine_s8(a: int8x8_t, b: int8x8_t) -> int8x16_t {
+    unsafe {
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t =
+            simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Join two smaller vectors into a single larger vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -7662,8 +7908,32 @@ pub fn vcombine_s16(a: int16x4_t, b: int16x4_t) -> int16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) }
 }
 #[doc = "Join two smaller vectors into a single larger vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vcombine_s16(a: int16x4_t, b: int16x4_t) -> int16x8_t {
+    unsafe {
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: int16x8_t = simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Join two smaller vectors into a single larger vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -7679,8 +7949,32 @@ pub fn vcombine_s32(a: int32x2_t, b: int32x2_t) -> int32x4_t {
     unsafe { simd_shuffle!(a, b, [0, 1, 2, 3]) }
 }
 #[doc = "Join two smaller vectors into a single larger vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vcombine_s32(a: int32x2_t, b: int32x2_t) -> int32x4_t {
+    unsafe {
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: int32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: int32x4_t = simd_shuffle!(a, b, [0, 1, 2, 3]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Join two smaller vectors into a single larger vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -7696,8 +7990,30 @@ pub fn vcombine_s64(a: int64x1_t, b: int64x1_t) -> int64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 1]) }
 }
 #[doc = "Join two smaller vectors into a single larger vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vcombine_s64(a: int64x1_t, b: int64x1_t) -> int64x2_t {
+    unsafe {
+        let ret_val: int64x2_t = simd_shuffle!(a, b, [0, 1]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Join two smaller vectors into a single larger vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -7713,8 +8029,37 @@ pub fn vcombine_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x16_t {
     unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) }
 }
 #[doc = "Join two smaller vectors into a single larger vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vcombine_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x16_t {
+    unsafe {
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t =
+            simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Join two smaller vectors into a single larger vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -7730,8 +8075,32 @@ pub fn vcombine_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) }
 }
 #[doc = "Join two smaller vectors into a single larger vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vcombine_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x8_t {
+    unsafe {
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: uint16x8_t = simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Join two smaller vectors into a single larger vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -7747,8 +8116,32 @@ pub fn vcombine_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x4_t {
     unsafe { simd_shuffle!(a, b, [0, 1, 2, 3]) }
 }
 #[doc = "Join two smaller vectors into a single larger vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vcombine_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x4_t {
+    unsafe {
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: uint32x4_t = simd_shuffle!(a, b, [0, 1, 2, 3]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Join two smaller vectors into a single larger vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -7764,8 +8157,30 @@ pub fn vcombine_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 1]) }
 }
 #[doc = "Join two smaller vectors into a single larger vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vcombine_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x2_t {
+    unsafe {
+        let ret_val: uint64x2_t = simd_shuffle!(a, b, [0, 1]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Join two smaller vectors into a single larger vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -7781,8 +8196,37 @@ pub fn vcombine_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x16_t {
     unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) }
 }
 #[doc = "Join two smaller vectors into a single larger vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vcombine_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x16_t {
+    unsafe {
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t =
+            simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
+}
+#[doc = "Join two smaller vectors into a single larger vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -7798,8 +8242,32 @@ pub fn vcombine_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]) }
 }
 #[doc = "Join two smaller vectors into a single larger vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vcombine_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x8_t {
+    unsafe {
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: poly16x8_t = simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Join two smaller vectors into a single larger vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -7814,10 +8282,30 @@ pub fn vcombine_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x8_t {
 pub fn vcombine_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 1]) }
 }
+#[doc = "Join two smaller vectors into a single larger vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p64)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vcombine_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x2_t {
+    unsafe {
+        let ret_val: poly64x2_t = simd_shuffle!(a, b, [0, 1]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -7839,36 +8327,8 @@ pub fn vcreate_f16(a: u64) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vcreate_f16(a: u64) -> float16x4_t {
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -7888,34 +8348,8 @@ pub fn vcreate_f32(a: u64) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vcreate_f32(a: u64) -> float32x2_t {
-    unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -7935,34 +8369,8 @@ pub fn vcreate_s8(a: u64) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vcreate_s8(a: u64) -> int8x8_t {
-    unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -7982,34 +8390,8 @@ pub fn vcreate_s16(a: u64) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vcreate_s16(a: u64) -> int16x4_t {
-    unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -8029,31 +8411,6 @@ pub fn vcreate_s32(a: u64) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vcreate_s32(a: u64) -> int32x2_t {
-    unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -8077,7 +8434,6 @@ pub fn vcreate_s64(a: u64) -> int64x1_t {
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -8097,34 +8453,8 @@ pub fn vcreate_u8(a: u64) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vcreate_u8(a: u64) -> uint8x8_t {
-    unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -8144,34 +8474,8 @@ pub fn vcreate_u16(a: u64) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vcreate_u16(a: u64) -> uint16x4_t {
-    unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -8191,31 +8495,6 @@ pub fn vcreate_u32(a: u64) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vcreate_u32(a: u64) -> uint32x2_t {
-    unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -8239,7 +8518,6 @@ pub fn vcreate_u64(a: u64) -> uint64x1_t {
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -8259,34 +8537,8 @@ pub fn vcreate_p8(a: u64) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vcreate_p8(a: u64) -> poly8x8_t {
-    unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -8306,31 +8558,6 @@ pub fn vcreate_p16(a: u64) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vcreate_p16(a: u64) -> poly16x4_t {
-    unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p64)"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
@@ -9532,6 +9759,7 @@ pub fn vdotq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x1
 #[doc = "Dot product arithmetic (vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[target_feature(enable = "neon,dotprod")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot))]
@@ -9559,8 +9787,45 @@ pub fn vdot_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
     unsafe { _vdot_s32(a, b, c) }
 }
 #[doc = "Dot product arithmetic (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_s32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sdot)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vdot_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sdot.v2i32.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sdot.v2i32.v8i8"
+        )]
+        fn _vdot_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t;
+    }
+    unsafe {
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let c: int8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int32x2_t = _vdot_s32(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Dot product arithmetic (vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[target_feature(enable = "neon,dotprod")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot))]
@@ -9588,8 +9853,47 @@ pub fn vdotq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
     unsafe { _vdotq_s32(a, b, c) }
 }
 #[doc = "Dot product arithmetic (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_s32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sdot)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vdotq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sdot.v4i32.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sdot.v4i32.v16i8"
+        )]
+        fn _vdotq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t;
+    }
+    unsafe {
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let c: int8x16_t =
+            simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int32x4_t = _vdotq_s32(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Dot product arithmetic (vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[target_feature(enable = "neon,dotprod")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot))]
@@ -9617,8 +9921,45 @@ pub fn vdot_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
     unsafe { _vdot_u32(a, b, c) }
 }
 #[doc = "Dot product arithmetic (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_u32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(udot)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vdot_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.udot.v2i32.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.udot.v2i32.v8i8"
+        )]
+        fn _vdot_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t;
+    }
+    unsafe {
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint32x2_t = _vdot_u32(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Dot product arithmetic (vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[target_feature(enable = "neon,dotprod")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot))]
@@ -9645,9 +9986,48 @@ pub fn vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
     }
     unsafe { _vdotq_u32(a, b, c) }
 }
+#[doc = "Dot product arithmetic (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_u32)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(udot)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.udot.v4i32.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.udot.v4i32.v16i8"
+        )]
+        fn _vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t;
+    }
+    unsafe {
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let c: uint8x16_t =
+            simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint32x4_t = _vdotq_u32(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
@@ -9671,8 +10051,9 @@ pub fn vdup_lane_f16<const N: i32>(a: float16x4_t) -> float16x4_t {
     unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
@@ -9691,59 +10072,74 @@ pub fn vdup_lane_f16<const N: i32>(a: float16x4_t) -> float16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vdupq_lane_f16<const N: i32>(a: float16x4_t) -> float16x8_t {
+pub fn vdup_lane_f16<const N: i32>(a: float16x4_t) -> float16x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
+    unsafe {
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: float16x4_t = simd_shuffle!(a, a, [N as u32; 4]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 1)
+    assert_instr(dup, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_lane_f32<const N: i32>(a: float32x2_t) -> float32x2_t {
-    static_assert_uimm_bits!(N, 1);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vdupq_lane_f16<const N: i32>(a: float16x4_t) -> float16x8_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 1)
+    assert_instr(dup, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_lane_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(N, 1);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vdupq_lane_f16<const N: i32>(a: float16x4_t) -> float16x8_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe {
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: float16x8_t = simd_shuffle!(a, a, [N as u32; 8]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
@@ -9760,13 +10156,14 @@ pub fn vdup_lane_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_lane_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
+pub fn vdup_lane_f32<const N: i32>(a: float32x2_t) -> float32x2_t {
     static_assert_uimm_bits!(N, 1);
     unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
@@ -9783,13 +10180,18 @@ pub fn vdup_lane_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_lane_f32<const N: i32>(a: float32x2_t) -> float32x4_t {
+pub fn vdup_lane_f32<const N: i32>(a: float32x2_t) -> float32x2_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
+    unsafe {
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: float32x2_t = simd_shuffle!(a, a, [N as u32, N as u32]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
@@ -9806,13 +10208,14 @@ pub fn vdupq_lane_f32<const N: i32>(a: float32x2_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_lane_s32<const N: i32>(a: int32x2_t) -> int32x4_t {
+pub fn vdup_lane_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
+    unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
@@ -9829,19 +10232,24 @@ pub fn vdupq_lane_s32<const N: i32>(a: int32x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_lane_u32<const N: i32>(a: uint32x2_t) -> uint32x4_t {
+pub fn vdup_lane_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
+    unsafe {
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: int32x2_t = simd_shuffle!(a, a, [N as u32, N as u32]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 2)
+    assert_instr(dup, N = 1)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -9852,19 +10260,20 @@ pub fn vdupq_lane_u32<const N: i32>(a: uint32x2_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x4_t {
-    static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
+pub fn vdup_lane_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 2)
+    assert_instr(dup, N = 1)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -9875,19 +10284,24 @@ pub fn vdup_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_lane_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
+pub fn vdup_lane_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe {
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint32x2_t = simd_shuffle!(a, a, [N as u32, N as u32]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 2)
+    assert_instr(dup, N = 1)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -9898,19 +10312,20 @@ pub fn vdup_lane_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
+pub fn vdupq_lane_f32<const N: i32>(a: float32x2_t) -> float32x4_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 2)
+    assert_instr(dup, N = 1)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -9921,19 +10336,24 @@ pub fn vdup_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x8_t {
-    static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
+pub fn vdupq_lane_f32<const N: i32>(a: float32x2_t) -> float32x4_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe {
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: float32x4_t = simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 2)
+    assert_instr(dup, N = 1)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -9944,19 +10364,20 @@ pub fn vdupq_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_lane_s16<const N: i32>(a: int16x4_t) -> int16x8_t {
-    static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
+pub fn vdupq_lane_s32<const N: i32>(a: int32x2_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 2)
+    assert_instr(dup, N = 1)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -9967,19 +10388,24 @@ pub fn vdupq_lane_s16<const N: i32>(a: int16x4_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x8_t {
-    static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
+pub fn vdupq_lane_s32<const N: i32>(a: int32x2_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe {
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: int32x4_t = simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 4)
+    assert_instr(dup, N = 1)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -9990,19 +10416,20 @@ pub fn vdupq_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
+pub fn vdupq_lane_u32<const N: i32>(a: uint32x2_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 4)
+    assert_instr(dup, N = 1)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -10013,19 +10440,24 @@ pub fn vdup_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_lane_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
+pub fn vdupq_lane_u32<const N: i32>(a: uint32x2_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe {
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint32x4_t = simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 4)
+    assert_instr(dup, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -10036,19 +10468,20 @@ pub fn vdup_lane_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
+pub fn vdup_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 4)
+    assert_instr(dup, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -10059,19 +10492,24 @@ pub fn vdup_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shuffle!(a, a, [N as u32; 16]) }
+pub fn vdup_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe {
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly16x4_t = simd_shuffle!(a, a, [N as u32; 4]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 4)
+    assert_instr(dup, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -10082,19 +10520,20 @@ pub fn vdupq_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_lane_s8<const N: i32>(a: int8x8_t) -> int8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shuffle!(a, a, [N as u32; 16]) }
+pub fn vdup_lane_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 4)
+    assert_instr(dup, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -10105,19 +10544,24 @@ pub fn vdupq_lane_s8<const N: i32>(a: int8x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shuffle!(a, a, [N as u32; 16]) }
+pub fn vdup_lane_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe {
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int16x4_t = simd_shuffle!(a, a, [N as u32; 4]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, N = 0)
+    assert_instr(dup, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -10128,19 +10572,20 @@ pub fn vdupq_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_lane_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
-    static_assert!(N == 0);
-    a
+pub fn vdup_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, N = 0)
+    assert_instr(dup, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -10151,66 +10596,73 @@ pub fn vdup_lane_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_lane_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
-    static_assert!(N == 0);
-    a
+pub fn vdup_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe {
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint16x4_t = simd_shuffle!(a, a, [N as u32; 4]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 4)
+    assert_instr(dup, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vdup_laneq_f16<const N: i32>(a: float16x8_t) -> float16x4_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
+pub fn vdupq_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x8_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 4)
+    assert_instr(dup, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vdupq_laneq_f16<const N: i32>(a: float16x8_t) -> float16x8_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
+pub fn vdupq_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x8_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe {
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly16x8_t = simd_shuffle!(a, a, [N as u32; 8]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(dup, N = 2)
@@ -10224,16 +10676,17 @@ pub fn vdupq_laneq_f16<const N: i32>(a: float16x8_t) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_laneq_f32<const N: i32>(a: float32x4_t) -> float32x2_t {
+pub fn vdupq_lane_s16<const N: i32>(a: int16x4_t) -> int16x8_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(dup, N = 2)
@@ -10247,16 +10700,21 @@ pub fn vdup_laneq_f32<const N: i32>(a: float32x4_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_laneq_s32<const N: i32>(a: int32x4_t) -> int32x2_t {
+pub fn vdupq_lane_s16<const N: i32>(a: int16x4_t) -> int16x8_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) }
+    unsafe {
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int16x8_t = simd_shuffle!(a, a, [N as u32; 8]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(dup, N = 2)
@@ -10270,16 +10728,17 @@ pub fn vdup_laneq_s32<const N: i32>(a: int32x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_laneq_u32<const N: i32>(a: uint32x4_t) -> uint32x2_t {
+pub fn vdupq_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x8_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(dup, N = 2)
@@ -10293,19 +10752,24 @@ pub fn vdup_laneq_u32<const N: i32>(a: uint32x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_laneq_f32<const N: i32>(a: float32x4_t) -> float32x4_t {
+pub fn vdupq_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x8_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
+    unsafe {
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint16x8_t = simd_shuffle!(a, a, [N as u32; 8]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 2)
+    assert_instr(dup, N = 4)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -10316,19 +10780,20 @@ pub fn vdupq_laneq_f32<const N: i32>(a: float32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_laneq_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
+pub fn vdup_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 2)
+    assert_instr(dup, N = 4)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -10339,16 +10804,21 @@ pub fn vdupq_laneq_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_laneq_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
+pub fn vdup_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe {
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x8_t = simd_shuffle!(a, a, [N as u32; 8]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(dup, N = 4)
@@ -10362,16 +10832,17 @@ pub fn vdupq_laneq_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x4_t {
+pub fn vdup_lane_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(dup, N = 4)
@@ -10385,16 +10856,21 @@ pub fn vdup_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_laneq_s16<const N: i32>(a: int16x8_t) -> int16x4_t {
+pub fn vdup_lane_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
-}
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u16)"]
+    unsafe {
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = simd_shuffle!(a, a, [N as u32; 8]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(dup, N = 4)
@@ -10408,16 +10884,17 @@ pub fn vdup_laneq_s16<const N: i32>(a: int16x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x4_t {
+pub fn vdup_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(dup, N = 4)
@@ -10431,16 +10908,21 @@ pub fn vdup_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x8_t {
+pub fn vdup_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
+    unsafe {
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x8_t = simd_shuffle!(a, a, [N as u32; 8]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(dup, N = 4)
@@ -10454,16 +10936,17 @@ pub fn vdupq_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_laneq_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
+pub fn vdupq_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x16_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
+    unsafe { simd_shuffle!(a, a, [N as u32; 16]) }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(dup, N = 4)
@@ -10477,19 +10960,28 @@ pub fn vdupq_laneq_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
+pub fn vdupq_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x16_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
+    unsafe {
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t = simd_shuffle!(a, a, [N as u32; 16]);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 8)
+    assert_instr(dup, N = 4)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -10500,19 +10992,20 @@ pub fn vdupq_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x8_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
+pub fn vdupq_lane_s8<const N: i32>(a: int8x8_t) -> int8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe { simd_shuffle!(a, a, [N as u32; 16]) }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 8)
+    assert_instr(dup, N = 4)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -10523,19 +11016,28 @@ pub fn vdup_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_laneq_s8<const N: i32>(a: int8x16_t) -> int8x8_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
+pub fn vdupq_lane_s8<const N: i32>(a: int8x8_t) -> int8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe {
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = simd_shuffle!(a, a, [N as u32; 16]);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 8)
+    assert_instr(dup, N = 4)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -10546,19 +11048,20 @@ pub fn vdup_laneq_s8<const N: i32>(a: int8x16_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
+pub fn vdupq_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe { simd_shuffle!(a, a, [N as u32; 16]) }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 8)
+    assert_instr(dup, N = 4)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -10569,19 +11072,27 @@ pub fn vdup_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x16_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe { simd_shuffle!(a, a, [N as u32; 16]) }
+pub fn vdupq_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe {
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t = simd_shuffle!(a, a, [N as u32; 16]);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 8)
+    assert_instr(nop, N = 0)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -10592,19 +11103,19 @@ pub fn vdupq_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_laneq_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe { simd_shuffle!(a, a, [N as u32; 16]) }
+pub fn vdup_lane_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
+    static_assert!(N == 0);
+    a
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 8)
+    assert_instr(nop, N = 0)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -10615,119 +11126,134 @@ pub fn vdupq_laneq_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe { simd_shuffle!(a, a, [N as u32; 16]) }
+pub fn vdup_lane_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
+    static_assert!(N == 0);
+    a
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, N = 1)
+    assert_instr(dup, N = 4)
 )]
 #[rustc_legacy_const_generics(1)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_laneq_s64<const N: i32>(a: int64x2_t) -> int64x1_t {
-    static_assert_uimm_bits!(N, 1);
-    unsafe { transmute(vgetq_lane_s64::<N>(a)) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vdup_laneq_f16<const N: i32>(a: float16x8_t) -> float16x4_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, N = 1)
+    assert_instr(dup, N = 4)
 )]
 #[rustc_legacy_const_generics(1)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_laneq_u64<const N: i32>(a: uint64x2_t) -> uint64x1_t {
-    static_assert_uimm_bits!(N, 1);
-    unsafe { transmute(vgetq_lane_u64::<N>(a)) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vdup_laneq_f16<const N: i32>(a: float16x8_t) -> float16x4_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x4_t = simd_shuffle!(a, a, [N as u32; 4]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Create a new vector with all lanes set to a value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_f16)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(dup, N = 4)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vdup_n_f16(a: f16) -> float16x4_t {
-    float16x4_t::splat(a)
-}
-#[doc = "Create a new vector with all lanes set to a value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_f16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vdupq_n_f16(a: f16) -> float16x8_t {
-    float16x8_t::splat(a)
+pub fn vdupq_laneq_f16<const N: i32>(a: float16x8_t) -> float16x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_f32)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(dup, N = 4)
 )]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_n_f32(value: f32) -> float32x2_t {
-    float32x2_t::splat(value)
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vdupq_laneq_f16<const N: i32>(a: float16x8_t) -> float16x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x8_t = simd_shuffle!(a, a, [N as u32; 8]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_p16)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(dup, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -10736,19 +11262,22 @@ pub fn vdup_n_f32(value: f32) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_n_p16(value: p16) -> poly16x4_t {
-    poly16x4_t::splat(value)
+pub fn vdup_laneq_f32<const N: i32>(a: float32x4_t) -> float32x2_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_p8)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(dup, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -10757,19 +11286,26 @@ pub fn vdup_n_p16(value: p16) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_n_p8(value: p8) -> poly8x8_t {
-    poly8x8_t::splat(value)
+pub fn vdup_laneq_f32<const N: i32>(a: float32x4_t) -> float32x2_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe {
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: float32x2_t = simd_shuffle!(a, a, [N as u32, N as u32]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s16)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(dup, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -10778,19 +11314,22 @@ pub fn vdup_n_p8(value: p8) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_n_s16(value: i16) -> int16x4_t {
-    int16x4_t::splat(value)
+pub fn vdup_laneq_s32<const N: i32>(a: int32x4_t) -> int32x2_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s32)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(dup, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -10799,19 +11338,26 @@ pub fn vdup_n_s16(value: i16) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_n_s32(value: i32) -> int32x2_t {
-    int32x2_t::splat(value)
+pub fn vdup_laneq_s32<const N: i32>(a: int32x4_t) -> int32x2_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe {
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int32x2_t = simd_shuffle!(a, a, [N as u32, N as u32]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s64)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmov)
+    assert_instr(dup, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -10820,19 +11366,22 @@ pub fn vdup_n_s32(value: i32) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_n_s64(value: i64) -> int64x1_t {
-    int64x1_t::splat(value)
+pub fn vdup_laneq_u32<const N: i32>(a: uint32x4_t) -> uint32x2_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s8)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(dup, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -10841,19 +11390,26 @@ pub fn vdup_n_s64(value: i64) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_n_s8(value: i8) -> int8x8_t {
-    int8x8_t::splat(value)
+pub fn vdup_laneq_u32<const N: i32>(a: uint32x4_t) -> uint32x2_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe {
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint32x2_t = simd_shuffle!(a, a, [N as u32, N as u32]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u16)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(dup, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -10862,19 +11418,22 @@ pub fn vdup_n_s8(value: i8) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_n_u16(value: u16) -> uint16x4_t {
-    uint16x4_t::splat(value)
+pub fn vdupq_laneq_f32<const N: i32>(a: float32x4_t) -> float32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u32)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(dup, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -10883,19 +11442,26 @@ pub fn vdup_n_u16(value: u16) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_n_u32(value: u32) -> uint32x2_t {
-    uint32x2_t::splat(value)
+pub fn vdupq_laneq_f32<const N: i32>(a: float32x4_t) -> float32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe {
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: float32x4_t = simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u64)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmov)
+    assert_instr(dup, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -10904,19 +11470,22 @@ pub fn vdup_n_u32(value: u32) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_n_u64(value: u64) -> uint64x1_t {
-    uint64x1_t::splat(value)
+pub fn vdupq_laneq_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u8)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(dup, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -10925,19 +11494,26 @@ pub fn vdup_n_u64(value: u64) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdup_n_u8(value: u8) -> uint8x8_t {
-    uint8x8_t::splat(value)
+pub fn vdupq_laneq_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe {
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int32x4_t = simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_f32)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(dup, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -10946,19 +11522,22 @@ pub fn vdup_n_u8(value: u8) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_n_f32(value: f32) -> float32x4_t {
-    float32x4_t::splat(value)
+pub fn vdupq_laneq_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_p16)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(dup, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -10967,19 +11546,26 @@ pub fn vdupq_n_f32(value: f32) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_n_p16(value: p16) -> poly16x8_t {
-    poly16x8_t::splat(value)
+pub fn vdupq_laneq_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe {
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_p8)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(dup, N = 4)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -10988,19 +11574,22 @@ pub fn vdupq_n_p16(value: p16) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_n_p8(value: p8) -> poly8x16_t {
-    poly8x16_t::splat(value)
+pub fn vdup_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x4_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s16)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(dup, N = 4)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11009,19 +11598,26 @@ pub fn vdupq_n_p8(value: p8) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_n_s16(value: i16) -> int16x8_t {
-    int16x8_t::splat(value)
+pub fn vdup_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x4_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe {
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly16x4_t = simd_shuffle!(a, a, [N as u32; 4]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s32)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(dup, N = 4)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11030,19 +11626,22 @@ pub fn vdupq_n_s16(value: i16) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_n_s32(value: i32) -> int32x4_t {
-    int32x4_t::splat(value)
+pub fn vdup_laneq_s16<const N: i32>(a: int16x8_t) -> int16x4_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s64)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(dup, N = 4)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11051,19 +11650,26 @@ pub fn vdupq_n_s32(value: i32) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_n_s64(value: i64) -> int64x2_t {
-    int64x2_t::splat(value)
+pub fn vdup_laneq_s16<const N: i32>(a: int16x8_t) -> int16x4_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe {
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x4_t = simd_shuffle!(a, a, [N as u32; 4]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s8)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(dup, N = 4)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11072,19 +11678,22 @@ pub fn vdupq_n_s64(value: i64) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_n_s8(value: i8) -> int8x16_t {
-    int8x16_t::splat(value)
+pub fn vdup_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u16)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(dup, N = 4)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11093,19 +11702,26 @@ pub fn vdupq_n_s8(value: i8) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_n_u16(value: u16) -> uint16x8_t {
-    uint16x8_t::splat(value)
+pub fn vdup_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe {
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x4_t = simd_shuffle!(a, a, [N as u32; 4]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u32)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(dup, N = 4)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11114,19 +11730,22 @@ pub fn vdupq_n_u16(value: u16) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_n_u32(value: u32) -> uint32x4_t {
-    uint32x4_t::splat(value)
+pub fn vdupq_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u64)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(dup, N = 4)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11135,19 +11754,26 @@ pub fn vdupq_n_u32(value: u32) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_n_u64(value: u64) -> uint64x2_t {
-    uint64x2_t::splat(value)
+pub fn vdupq_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe {
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly16x8_t = simd_shuffle!(a, a, [N as u32; 8]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u8)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(dup, N = 4)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11156,19 +11782,22 @@ pub fn vdupq_n_u64(value: u64) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_n_u8(value: u8) -> uint8x16_t {
-    uint8x16_t::splat(value)
+pub fn vdupq_laneq_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_f32_vfp4)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(dup, N = 4)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11177,19 +11806,26 @@ pub fn vdupq_n_u8(value: u8) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-fn vdup_n_f32_vfp4(value: f32) -> float32x2_t {
-    float32x2_t::splat(value)
+pub fn vdupq_laneq_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe {
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x8_t = simd_shuffle!(a, a, [N as u32; 8]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_f32_vfp4)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(dup, N = 4)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11198,18 +11834,20 @@ fn vdup_n_f32_vfp4(value: f32) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-fn vdupq_n_f32_vfp4(value: f32) -> float32x4_t {
-    float32x4_t::splat(value)
+pub fn vdupq_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 0)
+    assert_instr(dup, N = 4)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -11220,19 +11858,24 @@ fn vdupq_n_f32_vfp4(value: f32) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_lane_s64<const N: i32>(a: int64x1_t) -> int64x2_t {
-    static_assert!(N == 0);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) }
+pub fn vdupq_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe {
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x8_t = simd_shuffle!(a, a, [N as u32; 8]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 0)
+    assert_instr(dup, N = 8)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -11243,19 +11886,20 @@ pub fn vdupq_lane_s64<const N: i32>(a: int64x1_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_lane_u64<const N: i32>(a: uint64x1_t) -> uint64x2_t {
-    static_assert!(N == 0);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) }
+pub fn vdup_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x8_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 1)
+    assert_instr(dup, N = 8)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -11266,19 +11910,25 @@ pub fn vdupq_lane_u64<const N: i32>(a: uint64x1_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_laneq_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) }
+pub fn vdup_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x8_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x8_t = simd_shuffle!(a, a, [N as u32; 8]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 1)
+    assert_instr(dup, N = 8)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -11289,20 +11939,22 @@ pub fn vdupq_laneq_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vdupq_laneq_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) }
+pub fn vdup_laneq_s8<const N: i32>(a: int8x16_t) -> int8x8_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s8)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup, N = 8)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11311,19 +11963,27 @@ pub fn vdupq_laneq_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn veor_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe { simd_xor(a, b) }
+pub fn vdup_laneq_s8<const N: i32>(a: int8x16_t) -> int8x8_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe {
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = simd_shuffle!(a, a, [N as u32; 8]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s8)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup, N = 8)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11332,19 +11992,22 @@ pub fn veor_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn veorq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe { simd_xor(a, b) }
+pub fn vdup_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s16)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup, N = 8)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11353,19 +12016,27 @@ pub fn veorq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn veor_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe { simd_xor(a, b) }
+pub fn vdup_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe {
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x8_t = simd_shuffle!(a, a, [N as u32; 8]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s16)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup, N = 8)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11374,19 +12045,22 @@ pub fn veor_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn veorq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe { simd_xor(a, b) }
+pub fn vdupq_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x16_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe { simd_shuffle!(a, a, [N as u32; 16]) }
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s32)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup, N = 8)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11395,19 +12069,31 @@ pub fn veorq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn veor_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe { simd_xor(a, b) }
+pub fn vdupq_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x16_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t = simd_shuffle!(a, a, [N as u32; 16]);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s32)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup, N = 8)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11416,19 +12102,22 @@ pub fn veor_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn veorq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe { simd_xor(a, b) }
+pub fn vdupq_laneq_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe { simd_shuffle!(a, a, [N as u32; 16]) }
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s64)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup, N = 8)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11437,19 +12126,31 @@ pub fn veorq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn veor_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    unsafe { simd_xor(a, b) }
+pub fn vdupq_laneq_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe {
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = simd_shuffle!(a, a, [N as u32; 16]);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s64)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup, N = 8)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11458,19 +12159,22 @@ pub fn veor_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn veorq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    unsafe { simd_xor(a, b) }
+pub fn vdupq_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe { simd_shuffle!(a, a, [N as u32; 16]) }
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u8)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup, N = 8)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11479,19 +12183,30 @@ pub fn veorq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn veor_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe { simd_xor(a, b) }
+pub fn vdupq_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe {
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t = simd_shuffle!(a, a, [N as u32; 16]);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u8)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(nop, N = 1)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11500,19 +12215,21 @@ pub fn veor_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn veorq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe { simd_xor(a, b) }
+pub fn vdup_laneq_s64<const N: i32>(a: int64x2_t) -> int64x1_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe { transmute(vgetq_lane_s64::<N>(a)) }
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u16)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(nop, N = 1)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11521,39 +12238,51 @@ pub fn veorq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn veor_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe { simd_xor(a, b) }
+pub fn vdup_laneq_u64<const N: i32>(a: uint64x2_t) -> uint64x1_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe { transmute(vgetq_lane_u64::<N>(a)) }
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u16)"]
+#[doc = "Create a new vector with all lanes set to a value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_f16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    assert_instr(dup)
 )]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vdup_n_f16(a: f16) -> float16x4_t {
+    float16x4_t::splat(a)
+}
+#[doc = "Create a new vector with all lanes set to a value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_f16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
 )]
-pub fn veorq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe { simd_xor(a, b) }
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vdupq_n_f16(a: f16) -> float16x8_t {
+    float16x8_t::splat(a)
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -11563,18 +12292,18 @@ pub fn veorq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn veor_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe { simd_xor(a, b) }
+pub fn vdup_n_f32(value: f32) -> float32x2_t {
+    float32x2_t::splat(value)
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_p16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -11584,18 +12313,18 @@ pub fn veor_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn veorq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe { simd_xor(a, b) }
+pub fn vdup_n_p16(value: p16) -> poly16x4_t {
+    poly16x4_t::splat(value)
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u64)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -11605,18 +12334,18 @@ pub fn veorq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn veor_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    unsafe { simd_xor(a, b) }
+pub fn vdup_n_p8(value: p8) -> poly8x8_t {
+    poly8x8_t::splat(value)
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u64)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -11626,44 +12355,40 @@ pub fn veor_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn veorq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    unsafe { simd_xor(a, b) }
+pub fn vdup_n_s16(value: i16) -> int16x4_t {
+    int16x4_t::splat(value)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f16)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s32)"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 3)
+    assert_instr(dup)
 )]
-#[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vext_f16<const N: i32>(a: float16x4_t, b: float16x4_t) -> float16x4_t {
-    static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
+pub fn vdup_n_s32(value: i32) -> int32x2_t {
+    int32x2_t::splat(value)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 1)
+    assert_instr(fmov)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11672,21 +12397,19 @@ pub fn vext_f16<const N: i32>(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vext_f32<const N: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    static_assert_uimm_bits!(N, 1);
-    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
+pub fn vdup_n_s64(value: i64) -> int64x1_t {
+    int64x1_t::splat(value)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 1)
+    assert_instr(dup)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11695,21 +12418,19 @@ pub fn vext_f32<const N: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vext_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(N, 1);
-    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
+pub fn vdup_n_s8(value: i8) -> int8x8_t {
+    int8x8_t::splat(value)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 1)
+    assert_instr(dup)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11718,23 +12439,19 @@ pub fn vext_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vext_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(N, 1);
-    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
+pub fn vdup_n_u16(value: u16) -> uint16x4_t {
+    uint16x4_t::splat(value)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, N = 0)
+    assert_instr(dup)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11743,23 +12460,19 @@ pub fn vext_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vext_s64<const N: i32>(a: int64x1_t, _b: int64x1_t) -> int64x1_t {
-    static_assert!(N == 0);
-    a
+pub fn vdup_n_u32(value: u32) -> uint32x2_t {
+    uint32x2_t::splat(value)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, N = 0)
+    assert_instr(fmov)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11768,21 +12481,19 @@ pub unsafe fn vext_s64<const N: i32>(a: int64x1_t, _b: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vext_u64<const N: i32>(a: uint64x1_t, _b: uint64x1_t) -> uint64x1_t {
-    static_assert!(N == 0);
-    a
+pub fn vdup_n_u64(value: u64) -> uint64x1_t {
+    uint64x1_t::splat(value)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s8)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 7)
+    assert_instr(dup)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11791,36 +12502,19 @@ pub unsafe fn vext_u64<const N: i32>(a: uint64x1_t, _b: uint64x1_t) -> uint64x1_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vext_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            b,
-            [
-                N as u32,
-                N as u32 + 1,
-                N as u32 + 2,
-                N as u32 + 3,
-                N as u32 + 4,
-                N as u32 + 5,
-                N as u32 + 6,
-                N as u32 + 7
-            ]
-        )
-    }
+pub fn vdup_n_u8(value: u8) -> uint8x8_t {
+    uint8x8_t::splat(value)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s16)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 7)
+    assert_instr(dup)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11829,36 +12523,19 @@ pub fn vext_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vextq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            b,
-            [
-                N as u32,
-                N as u32 + 1,
-                N as u32 + 2,
-                N as u32 + 3,
-                N as u32 + 4,
-                N as u32 + 5,
-                N as u32 + 6,
-                N as u32 + 7
-            ]
-        )
-    }
+pub fn vdupq_n_f32(value: f32) -> float32x4_t {
+    float32x4_t::splat(value)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u8)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_p16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 7)
+    assert_instr(dup)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11867,36 +12544,19 @@ pub fn vextq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vext_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            b,
-            [
-                N as u32,
-                N as u32 + 1,
-                N as u32 + 2,
-                N as u32 + 3,
-                N as u32 + 4,
-                N as u32 + 5,
-                N as u32 + 6,
-                N as u32 + 7
-            ]
-        )
-    }
+pub fn vdupq_n_p16(value: p16) -> poly16x8_t {
+    poly16x8_t::splat(value)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u16)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 7)
+    assert_instr(dup)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11905,36 +12565,19 @@ pub fn vext_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vextq_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            b,
-            [
-                N as u32,
-                N as u32 + 1,
-                N as u32 + 2,
-                N as u32 + 3,
-                N as u32 + 4,
-                N as u32 + 5,
-                N as u32 + 6,
-                N as u32 + 7
-            ]
-        )
-    }
+pub fn vdupq_n_p8(value: p8) -> poly8x16_t {
+    poly8x16_t::splat(value)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p8)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 7)
+    assert_instr(dup)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11943,36 +12586,19 @@ pub fn vextq_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vext_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            b,
-            [
-                N as u32,
-                N as u32 + 1,
-                N as u32 + 2,
-                N as u32 + 3,
-                N as u32 + 4,
-                N as u32 + 5,
-                N as u32 + 6,
-                N as u32 + 7
-            ]
-        )
-    }
+pub fn vdupq_n_s16(value: i16) -> int16x8_t {
+    int16x8_t::splat(value)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p16)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 7)
+    assert_instr(dup)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11981,75 +12607,40 @@ pub fn vext_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vextq_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            b,
-            [
-                N as u32,
-                N as u32 + 1,
-                N as u32 + 2,
-                N as u32 + 3,
-                N as u32 + 4,
-                N as u32 + 5,
-                N as u32 + 6,
-                N as u32 + 7
-            ]
-        )
-    }
+pub fn vdupq_n_s32(value: i32) -> int32x4_t {
+    int32x4_t::splat(value)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f16)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s64)"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 7)
+    assert_instr(dup)
 )]
-#[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vextq_f16<const N: i32>(a: float16x8_t, b: float16x8_t) -> float16x8_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            b,
-            [
-                N as u32,
-                N as u32 + 1,
-                N as u32 + 2,
-                N as u32 + 3,
-                N as u32 + 4,
-                N as u32 + 5,
-                N as u32 + 6,
-                N as u32 + 7
-            ]
-        )
-    }
+pub fn vdupq_n_s64(value: i64) -> int64x2_t {
+    int64x2_t::splat(value)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 3)
+    assert_instr(dup)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -12058,21 +12649,19 @@ pub fn vextq_f16<const N: i32>(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vextq_f32<const N: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
+pub fn vdupq_n_s8(value: i8) -> int8x16_t {
+    int8x16_t::splat(value)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s16)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 3)
+    assert_instr(dup)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -12081,21 +12670,19 @@ pub fn vextq_f32<const N: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vext_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
+pub fn vdupq_n_u16(value: u16) -> uint16x8_t {
+    uint16x8_t::splat(value)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 3)
+    assert_instr(dup)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -12104,21 +12691,19 @@ pub fn vext_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vextq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
+pub fn vdupq_n_u32(value: u32) -> uint32x4_t {
+    uint32x4_t::splat(value)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u16)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 3)
+    assert_instr(dup)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -12127,21 +12712,19 @@ pub fn vextq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vext_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
+pub fn vdupq_n_u64(value: u64) -> uint64x2_t {
+    uint64x2_t::splat(value)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 3)
+    assert_instr(dup)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -12150,21 +12733,19 @@ pub fn vext_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vextq_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
+pub fn vdupq_n_u8(value: u8) -> uint8x16_t {
+    uint8x16_t::splat(value)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p16)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_f32_vfp4)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 3)
+    assert_instr(dup)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -12173,21 +12754,19 @@ pub fn vextq_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vext_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
+fn vdup_n_f32_vfp4(value: f32) -> float32x2_t {
+    float32x2_t::splat(value)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s64)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_f32_vfp4)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 1)
+    assert_instr(dup)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -12196,21 +12775,21 @@ pub fn vext_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vextq_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
+fn vdupq_n_f32_vfp4(value: f32) -> float32x4_t {
+    float32x4_t::splat(value)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u64)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 1)
+    assert_instr(dup, N = 0)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -12219,21 +12798,22 @@ pub fn vextq_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vextq_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
+pub fn vdupq_lane_s64<const N: i32>(a: int64x1_t) -> int64x2_t {
+    static_assert!(N == 0);
+    unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) }
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s8)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 15)
+    assert_instr(dup, N = 0)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -12242,44 +12822,25 @@ pub fn vextq_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vextq_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    static_assert_uimm_bits!(N, 4);
+pub fn vdupq_lane_s64<const N: i32>(a: int64x1_t) -> int64x2_t {
+    static_assert!(N == 0);
     unsafe {
-        simd_shuffle!(
-            a,
-            b,
-            [
-                N as u32,
-                N as u32 + 1,
-                N as u32 + 2,
-                N as u32 + 3,
-                N as u32 + 4,
-                N as u32 + 5,
-                N as u32 + 6,
-                N as u32 + 7,
-                N as u32 + 8,
-                N as u32 + 9,
-                N as u32 + 10,
-                N as u32 + 11,
-                N as u32 + 12,
-                N as u32 + 13,
-                N as u32 + 14,
-                N as u32 + 15
-            ]
-        )
+        let ret_val: int64x2_t = simd_shuffle!(a, a, [N as u32, N as u32]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u8)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 15)
+    assert_instr(dup, N = 0)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -12288,44 +12849,22 @@ pub fn vextq_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vextq_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe {
-        simd_shuffle!(
-            a,
-            b,
-            [
-                N as u32,
-                N as u32 + 1,
-                N as u32 + 2,
-                N as u32 + 3,
-                N as u32 + 4,
-                N as u32 + 5,
-                N as u32 + 6,
-                N as u32 + 7,
-                N as u32 + 8,
-                N as u32 + 9,
-                N as u32 + 10,
-                N as u32 + 11,
-                N as u32 + 12,
-                N as u32 + 13,
-                N as u32 + 14,
-                N as u32 + 15
-            ]
-        )
-    }
+pub fn vdupq_lane_u64<const N: i32>(a: uint64x1_t) -> uint64x2_t {
+    static_assert!(N == 0);
+    unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) }
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p8)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 15)
+    assert_instr(dup, N = 0)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -12334,87 +12873,77 @@ pub fn vextq_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vextq_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    static_assert_uimm_bits!(N, 4);
+pub fn vdupq_lane_u64<const N: i32>(a: uint64x1_t) -> uint64x2_t {
+    static_assert!(N == 0);
     unsafe {
-        simd_shuffle!(
-            a,
-            b,
-            [
-                N as u32,
-                N as u32 + 1,
-                N as u32 + 2,
-                N as u32 + 3,
-                N as u32 + 4,
-                N as u32 + 5,
-                N as u32 + 6,
-                N as u32 + 7,
-                N as u32 + 8,
-                N as u32 + 9,
-                N as u32 + 10,
-                N as u32 + 11,
-                N as u32 + 12,
-                N as u32 + 13,
-                N as u32 + 14,
-                N as u32 + 15
-            ]
-        )
+        let ret_val: uint64x2_t = simd_shuffle!(a, a, [N as u32, N as u32]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
-#[doc = "Floating-point fused Multiply-Add to accumulator (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f16)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s64)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmla)
+    assert_instr(dup, N = 1)
 )]
-#[target_feature(enable = "neon,fp16")]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vfma_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
-    unsafe { simd_fma(b, c, a) }
+pub fn vdupq_laneq_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) }
 }
-#[doc = "Floating-point fused Multiply-Add to accumulator (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f16)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s64)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmla)
+    assert_instr(dup, N = 1)
 )]
-#[target_feature(enable = "neon,fp16")]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vfmaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
-    unsafe { simd_fma(b, c, a) }
-}
-#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f32)"]
-#[inline]
+pub fn vdupq_laneq_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe {
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: int64x2_t = simd_shuffle!(a, a, [N as u32, N as u32]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u64)"]
+#[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmla)
+    assert_instr(dup, N = 1)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -12423,19 +12952,22 @@ pub fn vfmaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    unsafe { simd_fma(b, c, a) }
+pub fn vdupq_laneq_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe { simd_shuffle!(a, a, [N as u32, N as u32]) }
 }
-#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f32)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmla)
+    assert_instr(dup, N = 1)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -12444,18 +12976,23 @@ pub fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    unsafe { simd_fma(b, c, a) }
+pub fn vdupq_laneq_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe {
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint64x2_t = simd_shuffle!(a, a, [N as u32, N as u32]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_n_f32)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmla)
+    assert_instr(eor)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -12465,18 +13002,18 @@ pub fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vfma_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
-    vfma_f32(a, b, vdup_n_f32_vfp4(c))
+pub fn veor_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_n_f32)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmla)
+    assert_instr(eor)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -12486,70 +13023,60 @@ pub fn vfma_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
-    vfmaq_f32(a, b, vdupq_n_f32_vfp4(c))
+pub fn veorq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Floating-point fused multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f16)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s16)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmls)
+    assert_instr(eor)
 )]
-#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vfms_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
-    unsafe {
-        let b: float16x4_t = simd_neg(b);
-        vfma_f16(a, b, c)
-    }
+pub fn veor_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Floating-point fused multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f16)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s16)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmls)
+    assert_instr(eor)
 )]
-#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vfmsq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
-    unsafe {
-        let b: float16x8_t = simd_neg(b);
-        vfmaq_f16(a, b, c)
-    }
+pub fn veorq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Floating-point fused multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f32)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmls)
+    assert_instr(eor)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -12559,21 +13086,18 @@ pub fn vfmsq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vfms_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    unsafe {
-        let b: float32x2_t = simd_neg(b);
-        vfma_f32(a, b, c)
-    }
+pub fn veor_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Floating-point fused multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f32)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmls)
+    assert_instr(eor)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -12583,21 +13107,18 @@ pub fn vfms_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vfmsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    unsafe {
-        let b: float32x4_t = simd_neg(b);
-        vfmaq_f32(a, b, c)
-    }
+pub fn veorq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_n_f32)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmls)
+    assert_instr(eor)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -12607,18 +13128,18 @@ pub fn vfmsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vfms_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
-    vfms_f32(a, b, vdup_n_f32_vfp4(c))
+pub fn veor_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_n_f32)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmls)
+    assert_instr(eor)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -12628,56 +13149,60 @@ pub fn vfms_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vfmsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
-    vfmsq_f32(a, b, vdupq_n_f32_vfp4(c))
+pub fn veorq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Duplicate vector element to vector"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f16)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(eor)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vget_high_f16(a: float16x8_t) -> float16x4_t {
-    unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) }
+pub fn veor_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Duplicate vector element to vector"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f16)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(eor)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vget_low_f16(a: float16x8_t) -> float16x4_t {
-    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
+pub fn veorq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f32)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext)
+    assert_instr(eor)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -12687,18 +13212,18 @@ pub fn vget_low_f16(a: float16x8_t) -> float16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_high_f32(a: float32x4_t) -> float32x2_t {
-    unsafe { simd_shuffle!(a, a, [2, 3]) }
+pub fn veor_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p16)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext)
+    assert_instr(eor)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -12708,18 +13233,18 @@ pub fn vget_high_f32(a: float32x4_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_high_p16(a: poly16x8_t) -> poly16x4_t {
-    unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) }
+pub fn veorq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p8)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext)
+    assert_instr(eor)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -12729,18 +13254,18 @@ pub fn vget_high_p16(a: poly16x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_high_p8(a: poly8x16_t) -> poly8x8_t {
-    unsafe { simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) }
+pub fn veor_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s16)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext)
+    assert_instr(eor)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -12750,18 +13275,18 @@ pub fn vget_high_p8(a: poly8x16_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_high_s16(a: int16x8_t) -> int16x4_t {
-    unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) }
+pub fn veorq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s32)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext)
+    assert_instr(eor)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -12771,18 +13296,18 @@ pub fn vget_high_s16(a: int16x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_high_s32(a: int32x4_t) -> int32x2_t {
-    unsafe { simd_shuffle!(a, a, [2, 3]) }
+pub fn veor_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s8)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext)
+    assert_instr(eor)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -12792,61 +13317,77 @@ pub fn vget_high_s32(a: int32x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_high_s8(a: int8x16_t) -> int8x8_t {
-    unsafe { simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) }
+pub fn veorq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u16)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext)
+    assert_instr(ext, N = 3)
 )]
+#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_high_u16(a: uint16x8_t) -> uint16x4_t {
-    unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vext_f16<const N: i32>(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u32)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext)
+    assert_instr(ext, N = 3)
 )]
+#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_high_u32(a: uint32x4_t) -> uint32x2_t {
-    unsafe { simd_shuffle!(a, a, [2, 3]) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vext_f16<const N: i32>(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe {
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float16x4_t =
+            simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u8)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext)
+    assert_instr(ext, N = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -12855,19 +13396,22 @@ pub fn vget_high_u32(a: uint32x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_high_u8(a: uint8x16_t) -> uint8x8_t {
-    unsafe { simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) }
+pub fn vext_f32<const N: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p64)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext)
+    assert_instr(ext, N = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -12876,19 +13420,27 @@ pub fn vget_high_u8(a: uint8x16_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_high_p64(a: poly64x2_t) -> poly64x1_t {
-    unsafe { transmute(u64x1::new(simd_extract!(a, 1))) }
+pub fn vext_f32<const N: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe {
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float32x2_t = simd_shuffle!(a, b, [N as u32, N as u32 + 1]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s64)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext)
+    assert_instr(ext, N = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -12897,19 +13449,22 @@ pub fn vget_high_p64(a: poly64x2_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_high_s64(a: int64x2_t) -> int64x1_t {
-    unsafe { int64x1_t([simd_extract!(a, 1)]) }
+pub fn vext_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u64)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext)
+    assert_instr(ext, N = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -12918,52 +13473,51 @@ pub fn vget_high_s64(a: int64x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_high_u64(a: uint64x2_t) -> uint64x1_t {
-    unsafe { uint64x1_t([simd_extract!(a, 1)]) }
+pub fn vext_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe {
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: int32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: int32x2_t = simd_shuffle!(a, b, [N as u32, N as u32 + 1]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Duplicate vector element to scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f16)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(ext, N = 1)
 )]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vget_lane_f16<const LANE: i32>(a: float16x4_t) -> f16 {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_extract!(a, LANE as u32) }
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vext_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
 }
-#[doc = "Duplicate vector element to scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f16)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(ext, N = 1)
 )]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vgetq_lane_f16<const LANE: i32>(a: float16x8_t) -> f16 {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe { simd_extract!(a, LANE as u32) }
-}
-#[doc = "Move vector element to general-purpose register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -12972,17 +13526,28 @@ pub fn vgetq_lane_f16<const LANE: i32>(a: float16x8_t) -> f16 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_lane_f32<const IMM5: i32>(v: float32x2_t) -> f32 {
-    static_assert_uimm_bits!(IMM5, 1);
-    unsafe { simd_extract!(v, IMM5 as u32) }
+pub fn vext_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe {
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: uint32x2_t = simd_shuffle!(a, b, [N as u32, N as u32 + 1]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Move vector element to general-purpose register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p16)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, N = 0)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -12991,17 +13556,23 @@ pub fn vget_lane_f32<const IMM5: i32>(v: float32x2_t) -> f32 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_lane_p16<const IMM5: i32>(v: poly16x4_t) -> p16 {
-    static_assert_uimm_bits!(IMM5, 2);
-    unsafe { simd_extract!(v, IMM5 as u32) }
+pub unsafe fn vext_s64<const N: i32>(a: int64x1_t, _b: int64x1_t) -> int64x1_t {
+    static_assert!(N == 0);
+    a
 }
-#[doc = "Move vector element to general-purpose register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p8)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, N = 0)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13010,17 +13581,22 @@ pub fn vget_lane_p16<const IMM5: i32>(v: poly16x4_t) -> p16 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_lane_p8<const IMM5: i32>(v: poly8x8_t) -> p8 {
-    static_assert_uimm_bits!(IMM5, 3);
-    unsafe { simd_extract!(v, IMM5 as u32) }
+pub unsafe fn vext_u64<const N: i32>(a: uint64x1_t, _b: uint64x1_t) -> uint64x1_t {
+    static_assert!(N == 0);
+    a
 }
-#[doc = "Move vector element to general-purpose register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s16)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 7)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13029,17 +13605,37 @@ pub fn vget_lane_p8<const IMM5: i32>(v: poly8x8_t) -> p8 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_lane_s16<const IMM5: i32>(v: int16x4_t) -> i16 {
-    static_assert_uimm_bits!(IMM5, 2);
-    unsafe { simd_extract!(v, IMM5 as u32) }
+pub fn vext_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe {
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        )
+    }
 }
-#[doc = "Move vector element to general-purpose register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s32)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 7)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13048,17 +13644,40 @@ pub fn vget_lane_s16<const IMM5: i32>(v: int16x4_t) -> i16 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_lane_s32<const IMM5: i32>(v: int32x2_t) -> i32 {
-    static_assert_uimm_bits!(IMM5, 1);
-    unsafe { simd_extract!(v, IMM5 as u32) }
+pub fn vext_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe {
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        );
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Move vector element to general-purpose register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s8)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 7)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13067,17 +13686,37 @@ pub fn vget_lane_s32<const IMM5: i32>(v: int32x2_t) -> i32 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_lane_s8<const IMM5: i32>(v: int8x8_t) -> i8 {
-    static_assert_uimm_bits!(IMM5, 3);
-    unsafe { simd_extract!(v, IMM5 as u32) }
+pub fn vextq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe {
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        )
+    }
 }
-#[doc = "Move vector element to general-purpose register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u16)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 7)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13086,17 +13725,40 @@ pub fn vget_lane_s8<const IMM5: i32>(v: int8x8_t) -> i8 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_lane_u16<const IMM5: i32>(v: uint16x4_t) -> u16 {
-    static_assert_uimm_bits!(IMM5, 2);
-    unsafe { simd_extract!(v, IMM5 as u32) }
+pub fn vextq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe {
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x8_t = simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        );
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Move vector element to general-purpose register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u32)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 7)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13105,17 +13767,37 @@ pub fn vget_lane_u16<const IMM5: i32>(v: uint16x4_t) -> u16 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_lane_u32<const IMM5: i32>(v: uint32x2_t) -> u32 {
-    static_assert_uimm_bits!(IMM5, 1);
-    unsafe { simd_extract!(v, IMM5 as u32) }
+pub fn vext_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe {
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        )
+    }
 }
-#[doc = "Move vector element to general-purpose register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u8)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 7)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13124,17 +13806,40 @@ pub fn vget_lane_u32<const IMM5: i32>(v: uint32x2_t) -> u32 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_lane_u8<const IMM5: i32>(v: uint8x8_t) -> u8 {
-    static_assert_uimm_bits!(IMM5, 3);
-    unsafe { simd_extract!(v, IMM5 as u32) }
+pub fn vext_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe {
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x8_t = simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        );
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Move vector element to general-purpose register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f32)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 7)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13143,17 +13848,37 @@ pub fn vget_lane_u8<const IMM5: i32>(v: uint8x8_t) -> u8 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vgetq_lane_f32<const IMM5: i32>(v: float32x4_t) -> f32 {
-    static_assert_uimm_bits!(IMM5, 2);
-    unsafe { simd_extract!(v, IMM5 as u32) }
+pub fn vextq_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe {
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        )
+    }
 }
-#[doc = "Move vector element to general-purpose register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p16)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 7)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13162,17 +13887,40 @@ pub fn vgetq_lane_f32<const IMM5: i32>(v: float32x4_t) -> f32 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vgetq_lane_p16<const IMM5: i32>(v: poly16x8_t) -> p16 {
-    static_assert_uimm_bits!(IMM5, 3);
-    unsafe { simd_extract!(v, IMM5 as u32) }
+pub fn vextq_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe {
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x8_t = simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        );
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Move vector element to general-purpose register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p64)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 7)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13181,17 +13929,37 @@ pub fn vgetq_lane_p16<const IMM5: i32>(v: poly16x8_t) -> p16 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vgetq_lane_p64<const IMM5: i32>(v: poly64x2_t) -> p64 {
-    static_assert_uimm_bits!(IMM5, 1);
-    unsafe { simd_extract!(v, IMM5 as u32) }
+pub fn vext_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe {
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        )
+    }
 }
-#[doc = "Move vector element to general-purpose register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p8)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 7)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13200,17 +13968,40 @@ pub fn vgetq_lane_p64<const IMM5: i32>(v: poly64x2_t) -> p64 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vgetq_lane_p8<const IMM5: i32>(v: poly8x16_t) -> p8 {
-    static_assert_uimm_bits!(IMM5, 4);
-    unsafe { simd_extract!(v, IMM5 as u32) }
+pub fn vext_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe {
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x8_t = simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        );
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Move vector element to general-purpose register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s16)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 7)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13219,17 +14010,37 @@ pub fn vgetq_lane_p8<const IMM5: i32>(v: poly8x16_t) -> p8 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vgetq_lane_s16<const IMM5: i32>(v: int16x8_t) -> i16 {
-    static_assert_uimm_bits!(IMM5, 3);
-    unsafe { simd_extract!(v, IMM5 as u32) }
+pub fn vextq_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe {
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        )
+    }
 }
-#[doc = "Move vector element to general-purpose register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s32)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 7)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13238,55 +14049,123 @@ pub fn vgetq_lane_s16<const IMM5: i32>(v: int16x8_t) -> i16 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vgetq_lane_s32<const IMM5: i32>(v: int32x4_t) -> i32 {
-    static_assert_uimm_bits!(IMM5, 2);
-    unsafe { simd_extract!(v, IMM5 as u32) }
+pub fn vextq_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe {
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly16x8_t = simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        );
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Move vector element to general-purpose register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s64)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 7)
+)]
+#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vgetq_lane_s64<const IMM5: i32>(v: int64x2_t) -> i64 {
-    static_assert_uimm_bits!(IMM5, 1);
-    unsafe { simd_extract!(v, IMM5 as u32) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vextq_f16<const N: i32>(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe {
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        )
+    }
 }
-#[doc = "Move vector element to general-purpose register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s8)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 7)
+)]
+#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vgetq_lane_s8<const IMM5: i32>(v: int8x16_t) -> i8 {
-    static_assert_uimm_bits!(IMM5, 4);
-    unsafe { simd_extract!(v, IMM5 as u32) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vextq_f16<const N: i32>(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x8_t = simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        );
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Move vector element to general-purpose register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u16)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 3)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13295,17 +14174,22 @@ pub fn vgetq_lane_s8<const IMM5: i32>(v: int8x16_t) -> i8 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vgetq_lane_u16<const IMM5: i32>(v: uint16x8_t) -> u16 {
-    static_assert_uimm_bits!(IMM5, 3);
-    unsafe { simd_extract!(v, IMM5 as u32) }
+pub fn vextq_f32<const N: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
 }
-#[doc = "Move vector element to general-purpose register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u32)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 3)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13314,17 +14198,28 @@ pub fn vgetq_lane_u16<const IMM5: i32>(v: uint16x8_t) -> u16 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vgetq_lane_u32<const IMM5: i32>(v: uint32x4_t) -> u32 {
-    static_assert_uimm_bits!(IMM5, 2);
-    unsafe { simd_extract!(v, IMM5 as u32) }
+pub fn vextq_f32<const N: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe {
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float32x4_t =
+            simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Move vector element to general-purpose register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u64)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 3)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13333,17 +14228,22 @@ pub fn vgetq_lane_u32<const IMM5: i32>(v: uint32x4_t) -> u32 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vgetq_lane_u64<const IMM5: i32>(v: uint64x2_t) -> u64 {
-    static_assert_uimm_bits!(IMM5, 2);
-    unsafe { simd_extract!(v, IMM5 as u32) }
+pub fn vext_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
 }
-#[doc = "Move vector element to general-purpose register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u8)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 3)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13352,17 +14252,28 @@ pub fn vgetq_lane_u64<const IMM5: i32>(v: uint64x2_t) -> u64 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vgetq_lane_u8<const IMM5: i32>(v: uint8x16_t) -> u8 {
-    static_assert_uimm_bits!(IMM5, 4);
-    unsafe { simd_extract!(v, IMM5 as u32) }
+pub fn vext_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe {
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: int16x4_t =
+            simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Move vector element to general-purpose register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p64)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 3)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13371,17 +14282,22 @@ pub fn vgetq_lane_u8<const IMM5: i32>(v: uint8x16_t) -> u8 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_lane_p64<const IMM5: i32>(v: poly64x1_t) -> p64 {
-    static_assert!(IMM5 == 0);
-    unsafe { simd_extract!(v, IMM5 as u32) }
+pub fn vextq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
 }
-#[doc = "Move vector element to general-purpose register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s64)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 3)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13390,17 +14306,28 @@ pub fn vget_lane_p64<const IMM5: i32>(v: poly64x1_t) -> p64 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_lane_s64<const IMM5: i32>(v: int64x1_t) -> i64 {
-    static_assert!(IMM5 == 0);
-    unsafe { simd_extract!(v, IMM5 as u32) }
+pub fn vextq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe {
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: int32x4_t =
+            simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Move vector element to general-purpose register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u64)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 3)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13409,16 +14336,22 @@ pub fn vget_lane_s64<const IMM5: i32>(v: int64x1_t) -> i64 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_lane_u64<const IMM5: i32>(v: uint64x1_t) -> u64 {
-    static_assert!(IMM5 == 0);
-    unsafe { simd_extract!(v, 0) }
+pub fn vext_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f32)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 3)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13427,15 +14360,28 @@ pub fn vget_lane_u64<const IMM5: i32>(v: uint64x1_t) -> u64 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_low_f32(a: float32x4_t) -> float32x2_t {
-    unsafe { simd_shuffle!(a, a, [0, 1]) }
+pub fn vext_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe {
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: uint16x4_t =
+            simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p16)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 3)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13444,15 +14390,22 @@ pub fn vget_low_f32(a: float32x4_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_low_p16(a: poly16x8_t) -> poly16x4_t {
-    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
+pub fn vextq_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p8)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 3)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13461,15 +14414,28 @@ pub fn vget_low_p16(a: poly16x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_low_p8(a: poly8x16_t) -> poly8x8_t {
-    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
+pub fn vextq_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe {
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t =
+            simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s16)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 3)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13478,15 +14444,22 @@ pub fn vget_low_p8(a: poly8x16_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_low_s16(a: int16x8_t) -> int16x4_t {
-    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
+pub fn vext_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s32)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 3)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13495,15 +14468,28 @@ pub fn vget_low_s16(a: int16x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_low_s32(a: int32x4_t) -> int32x2_t {
-    unsafe { simd_shuffle!(a, a, [0, 1]) }
+pub fn vext_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    static_assert_uimm_bits!(N, 2);
+    unsafe {
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: poly16x4_t =
+            simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s8)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 1)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13512,15 +14498,22 @@ pub fn vget_low_s32(a: int32x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_low_s8(a: int8x16_t) -> int8x8_t {
-    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
+pub fn vextq_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u16)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 1)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13529,15 +14522,27 @@ pub fn vget_low_s8(a: int8x16_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_low_u16(a: uint16x8_t) -> uint16x4_t {
-    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
+pub fn vextq_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe {
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: int64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: int64x2_t = simd_shuffle!(a, b, [N as u32, N as u32 + 1]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u32)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 1)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13546,15 +14551,22 @@ pub fn vget_low_u16(a: uint16x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_low_u32(a: uint32x4_t) -> uint32x2_t {
-    unsafe { simd_shuffle!(a, a, [0, 1]) }
+pub fn vextq_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u8)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 1)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13563,15 +14575,27 @@ pub fn vget_low_u32(a: uint32x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_low_u8(a: uint8x16_t) -> uint8x8_t {
-    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
+pub fn vextq_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    unsafe {
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: uint64x2_t = simd_shuffle!(a, b, [N as u32, N as u32 + 1]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p64)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 15)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13580,15 +14604,45 @@ pub fn vget_low_u8(a: uint8x16_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_low_p64(a: poly64x2_t) -> poly64x1_t {
-    unsafe { transmute(u64x1::new(simd_extract!(a, 0))) }
+pub fn vextq_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe {
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7,
+                N as u32 + 8,
+                N as u32 + 9,
+                N as u32 + 10,
+                N as u32 + 11,
+                N as u32 + 12,
+                N as u32 + 13,
+                N as u32 + 14,
+                N as u32 + 15
+            ]
+        )
+    }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s64)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 15)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13597,15 +14651,54 @@ pub fn vget_low_p64(a: poly64x2_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_low_s64(a: int64x2_t) -> int64x1_t {
-    unsafe { int64x1_t([simd_extract!(a, 0)]) }
+pub fn vextq_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe {
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7,
+                N as u32 + 8,
+                N as u32 + 9,
+                N as u32 + 10,
+                N as u32 + 11,
+                N as u32 + 12,
+                N as u32 + 13,
+                N as u32 + 14,
+                N as u32 + 15
+            ]
+        );
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u64)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 15)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13614,19 +14707,45 @@ pub fn vget_low_s64(a: int64x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vget_low_u64(a: uint64x2_t) -> uint64x1_t {
-    unsafe { uint64x1_t([simd_extract!(a, 0)]) }
+pub fn vextq_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe {
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7,
+                N as u32 + 8,
+                N as u32 + 9,
+                N as u32 + 10,
+                N as u32 + 11,
+                N as u32 + 12,
+                N as u32 + 13,
+                N as u32 + 14,
+                N as u32 + 15
+            ]
+        )
+    }
 }
-#[doc = "Halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s8)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shadd)
+    assert_instr(ext, N = 15)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13635,27 +14754,54 @@ pub fn vget_low_u64(a: uint64x2_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.shadd.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i8")]
-        fn _vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+pub fn vextq_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe {
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t = simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7,
+                N as u32 + 8,
+                N as u32 + 9,
+                N as u32 + 10,
+                N as u32 + 11,
+                N as u32 + 12,
+                N as u32 + 13,
+                N as u32 + 14,
+                N as u32 + 15
+            ]
+        );
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
-    unsafe { _vhadd_s8(a, b) }
 }
-#[doc = "Halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s8)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shadd)
+    assert_instr(ext, N = 15)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13664,27 +14810,45 @@ pub fn vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.shadd.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v16i8")]
-        fn _vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+pub fn vextq_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe {
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7,
+                N as u32 + 8,
+                N as u32 + 9,
+                N as u32 + 10,
+                N as u32 + 11,
+                N as u32 + 12,
+                N as u32 + 13,
+                N as u32 + 14,
+                N as u32 + 15
+            ]
+        )
     }
-    unsafe { _vhaddq_s8(a, b) }
 }
-#[doc = "Halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s16)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shadd)
+    assert_instr(ext, N = 15)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -13693,84 +14857,95 @@ pub fn vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.shadd.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i16")]
-        fn _vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+pub fn vextq_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t = simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7,
+                N as u32 + 8,
+                N as u32 + 9,
+                N as u32 + 10,
+                N as u32 + 11,
+                N as u32 + 12,
+                N as u32 + 13,
+                N as u32 + 14,
+                N as u32 + 15
+            ]
+        );
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
-    unsafe { _vhadd_s16(a, b) }
 }
-#[doc = "Halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s16)"]
+#[doc = "Floating-point fused Multiply-Add to accumulator (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shadd)
+    assert_instr(fmla)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.shadd.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i16")]
-        fn _vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-    unsafe { _vhaddq_s16(a, b) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vfma_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
+    unsafe { simd_fma(b, c, a) }
 }
-#[doc = "Halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s32)"]
+#[doc = "Floating-point fused Multiply-Add to accumulator (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shadd)
+    assert_instr(fmla)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.shadd.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v2i32")]
-        fn _vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-    unsafe { _vhadd_s32(a, b) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vfmaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
+    unsafe { simd_fma(b, c, a) }
 }
-#[doc = "Halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s32)"]
+#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shadd)
+    assert_instr(fmla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -13780,26 +14955,18 @@ pub fn vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.shadd.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i32")]
-        fn _vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-    unsafe { _vhaddq_s32(a, b) }
+pub fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    unsafe { simd_fma(b, c, a) }
 }
-#[doc = "Halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u8)"]
+#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uhadd)
+    assert_instr(fmla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -13809,26 +14976,18 @@ pub fn vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uhadd.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i8")]
-        fn _vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
-    }
-    unsafe { _vhadd_u8(a, b) }
+pub fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    unsafe { simd_fma(b, c, a) }
 }
-#[doc = "Halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u8)"]
+#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_n_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uhadd)
+    assert_instr(fmla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -13838,26 +14997,18 @@ pub fn vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uhadd.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v16i8")]
-        fn _vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
-    }
-    unsafe { _vhaddq_u8(a, b) }
+pub fn vfma_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
+    vfma_f32(a, b, vdup_n_f32_vfp4(c))
 }
-#[doc = "Halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u16)"]
+#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_n_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uhadd)
+    assert_instr(fmla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -13867,84 +15018,70 @@ pub fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uhadd.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i16")]
-        fn _vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
-    }
-    unsafe { _vhadd_u16(a, b) }
+pub fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
+    vfmaq_f32(a, b, vdupq_n_f32_vfp4(c))
 }
-#[doc = "Halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u16)"]
+#[doc = "Floating-point fused multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uhadd)
+    assert_instr(fmls)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uhadd.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i16")]
-        fn _vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vfms_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
+    unsafe {
+        let b: float16x4_t = simd_neg(b);
+        vfma_f16(a, b, c)
     }
-    unsafe { _vhaddq_u16(a, b) }
 }
-#[doc = "Halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u32)"]
+#[doc = "Floating-point fused multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uhadd)
+    assert_instr(fmls)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uhadd.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v2i32")]
-        fn _vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vfmsq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
+    unsafe {
+        let b: float16x8_t = simd_neg(b);
+        vfmaq_f16(a, b, c)
     }
-    unsafe { _vhadd_u32(a, b) }
 }
-#[doc = "Halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u32)"]
+#[doc = "Floating-point fused multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uhadd)
+    assert_instr(fmls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -13954,26 +15091,21 @@ pub fn vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uhadd.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i32")]
-        fn _vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
+pub fn vfms_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    unsafe {
+        let b: float32x2_t = simd_neg(b);
+        vfma_f32(a, b, c)
     }
-    unsafe { _vhaddq_u32(a, b) }
 }
-#[doc = "Signed halving subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s16)"]
+#[doc = "Floating-point fused multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shsub)
+    assert_instr(fmls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -13983,26 +15115,21 @@ pub fn vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.shsub.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i16")]
-        fn _vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+pub fn vfmsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    unsafe {
+        let b: float32x4_t = simd_neg(b);
+        vfmaq_f32(a, b, c)
     }
-    unsafe { _vhsub_s16(a, b) }
 }
-#[doc = "Signed halving subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s16)"]
+#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_n_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shsub)
+    assert_instr(fmls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14012,26 +15139,18 @@ pub fn vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.shsub.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i16")]
-        fn _vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-    unsafe { _vhsubq_s16(a, b) }
+pub fn vfms_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
+    vfms_f32(a, b, vdup_n_f32_vfp4(c))
 }
-#[doc = "Signed halving subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s32)"]
+#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_n_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shsub)
+    assert_instr(fmls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14041,142 +15160,107 @@ pub fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.shsub.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v2i32")]
-        fn _vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-    unsafe { _vhsub_s32(a, b) }
+pub fn vfmsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
+    vfmsq_f32(a, b, vdupq_n_f32_vfp4(c))
 }
-#[doc = "Signed halving subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s32)"]
+#[doc = "Duplicate vector element to vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shsub)
-)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.shsub.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i32")]
-        fn _vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-    unsafe { _vhsubq_s32(a, b) }
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vget_high_f16(a: float16x8_t) -> float16x4_t {
+    unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) }
 }
-#[doc = "Signed halving subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s8)"]
+#[doc = "Duplicate vector element to vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shsub)
-)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.shsub.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i8")]
-        fn _vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vget_high_f16(a: float16x8_t) -> float16x4_t {
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
-    unsafe { _vhsub_s8(a, b) }
 }
-#[doc = "Signed halving subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s8)"]
+#[doc = "Duplicate vector element to vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shsub)
-)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.shsub.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v16i8")]
-        fn _vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    }
-    unsafe { _vhsubq_s8(a, b) }
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vget_low_f16(a: float16x8_t) -> float16x4_t {
+    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
 }
-#[doc = "Signed halving subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u8)"]
+#[doc = "Duplicate vector element to vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uhsub)
-)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uhsub.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i8")]
-        fn _vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vget_low_f16(a: float16x8_t) -> float16x4_t {
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
-    unsafe { _vhsub_u8(a, b) }
 }
-#[doc = "Signed halving subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u8)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uhsub)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14186,26 +15270,19 @@ pub fn vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uhsub.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v16i8")]
-        fn _vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
-    }
-    unsafe { _vhsubq_u8(a, b) }
+pub fn vget_high_f32(a: float32x4_t) -> float32x2_t {
+    unsafe { simd_shuffle!(a, a, [2, 3]) }
 }
-#[doc = "Signed halving subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u16)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uhsub)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14215,26 +15292,23 @@ pub fn vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uhsub.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i16")]
-        fn _vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
+pub fn vget_high_f32(a: float32x4_t) -> float32x2_t {
+    unsafe {
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: float32x2_t = simd_shuffle!(a, a, [2, 3]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
-    unsafe { _vhsub_u16(a, b) }
 }
-#[doc = "Signed halving subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u16)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uhsub)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14244,26 +15318,19 @@ pub fn vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uhsub.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i16")]
-        fn _vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
-    }
-    unsafe { _vhsubq_u16(a, b) }
+pub fn vget_high_p16(a: poly16x8_t) -> poly16x4_t {
+    unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) }
 }
-#[doc = "Signed halving subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uhsub)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14273,26 +15340,23 @@ pub fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uhsub.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v2i32")]
-        fn _vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
+pub fn vget_high_p16(a: poly16x8_t) -> poly16x4_t {
+    unsafe {
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
-    unsafe { _vhsub_u32(a, b) }
 }
-#[doc = "Signed halving subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uhsub)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14302,66 +15366,46 @@ pub fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uhsub.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i32")]
-        fn _vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
-    }
-    unsafe { _vhsubq_u32(a, b) }
+pub fn vget_high_p8(a: poly8x16_t) -> poly8x8_t {
+    unsafe { simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) }
 }
-#[doc = "Load one single-element structure and replicate to all lanes of one register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1r)
+    assert_instr(ext)
 )]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld1_dup_f16(ptr: *const f16) -> float16x4_t {
-    let x: float16x4_t = vld1_lane_f16::<0>(ptr, transmute(f16x4::splat(0.0)));
-    simd_shuffle!(x, x, [0, 0, 0, 0])
-}
-#[doc = "Load one single-element structure and replicate to all lanes of one register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1r)
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld1q_dup_f16(ptr: *const f16) -> float16x8_t {
-    let x: float16x8_t = vld1q_lane_f16::<0>(ptr, transmute(f16x8::splat(0.0)));
-    simd_shuffle!(x, x, [0, 0, 0, 0, 0, 0, 0, 0])
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vget_high_p8(a: poly8x16_t) -> poly8x8_t {
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1r)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14371,20 +15415,19 @@ pub unsafe fn vld1q_dup_f16(ptr: *const f16) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_dup_f32(ptr: *const f32) -> float32x2_t {
-    transmute(f32x2::splat(*ptr))
+pub fn vget_high_s16(a: int16x8_t) -> int16x4_t {
+    unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) }
 }
-#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1r)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14394,20 +15437,23 @@ pub unsafe fn vld1_dup_f32(ptr: *const f32) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_dup_p16(ptr: *const p16) -> poly16x4_t {
-    transmute(u16x4::splat(*ptr))
+pub fn vget_high_s16(a: int16x8_t) -> int16x4_t {
+    unsafe {
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1r)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14417,20 +15463,19 @@ pub unsafe fn vld1_dup_p16(ptr: *const p16) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_dup_p8(ptr: *const p8) -> poly8x8_t {
-    transmute(u8x8::splat(*ptr))
+pub fn vget_high_s32(a: int32x4_t) -> int32x2_t {
+    unsafe { simd_shuffle!(a, a, [2, 3]) }
 }
-#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1r)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14440,20 +15485,23 @@ pub unsafe fn vld1_dup_p8(ptr: *const p8) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_dup_s16(ptr: *const i16) -> int16x4_t {
-    transmute(i16x4::splat(*ptr))
+pub fn vget_high_s32(a: int32x4_t) -> int32x2_t {
+    unsafe {
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int32x2_t = simd_shuffle!(a, a, [2, 3]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1r)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14463,20 +15511,19 @@ pub unsafe fn vld1_dup_s16(ptr: *const i16) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_dup_s32(ptr: *const i32) -> int32x2_t {
-    transmute(i32x2::splat(*ptr))
+pub fn vget_high_s8(a: int8x16_t) -> int8x8_t {
+    unsafe { simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) }
 }
-#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1r)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14486,20 +15533,24 @@ pub unsafe fn vld1_dup_s32(ptr: *const i32) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_dup_s8(ptr: *const i8) -> int8x8_t {
-    transmute(i8x8::splat(*ptr))
+pub fn vget_high_s8(a: int8x16_t) -> int8x8_t {
+    unsafe {
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1r)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14509,20 +15560,19 @@ pub unsafe fn vld1_dup_s8(ptr: *const i8) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_dup_u16(ptr: *const u16) -> uint16x4_t {
-    transmute(u16x4::splat(*ptr))
+pub fn vget_high_u16(a: uint16x8_t) -> uint16x4_t {
+    unsafe { simd_shuffle!(a, a, [4, 5, 6, 7]) }
 }
-#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1r)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14532,20 +15582,23 @@ pub unsafe fn vld1_dup_u16(ptr: *const u16) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_dup_u32(ptr: *const u32) -> uint32x2_t {
-    transmute(u32x2::splat(*ptr))
+pub fn vget_high_u16(a: uint16x8_t) -> uint16x4_t {
+    unsafe {
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1r)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14555,20 +15608,19 @@ pub unsafe fn vld1_dup_u32(ptr: *const u32) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_dup_u8(ptr: *const u8) -> uint8x8_t {
-    transmute(u8x8::splat(*ptr))
+pub fn vget_high_u32(a: uint32x4_t) -> uint32x2_t {
+    unsafe { simd_shuffle!(a, a, [2, 3]) }
 }
-#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1r)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14578,20 +15630,23 @@ pub unsafe fn vld1_dup_u8(ptr: *const u8) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_dup_f32(ptr: *const f32) -> float32x4_t {
-    transmute(f32x4::splat(*ptr))
+pub fn vget_high_u32(a: uint32x4_t) -> uint32x2_t {
+    unsafe {
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1r)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14601,20 +15656,19 @@ pub unsafe fn vld1q_dup_f32(ptr: *const f32) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_dup_p16(ptr: *const p16) -> poly16x8_t {
-    transmute(u16x8::splat(*ptr))
+pub fn vget_high_u8(a: uint8x16_t) -> uint8x8_t {
+    unsafe { simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) }
 }
-#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1r)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14624,20 +15678,24 @@ pub unsafe fn vld1q_dup_p16(ptr: *const p16) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_dup_p8(ptr: *const p8) -> poly8x16_t {
-    transmute(u8x16::splat(*ptr))
+pub fn vget_high_u8(a: uint8x16_t) -> uint8x8_t {
+    unsafe {
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1r)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14647,20 +15705,19 @@ pub unsafe fn vld1q_dup_p8(ptr: *const p8) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_dup_s16(ptr: *const i16) -> int16x8_t {
-    transmute(i16x8::splat(*ptr))
+pub fn vget_high_p64(a: poly64x2_t) -> poly64x1_t {
+    unsafe { transmute(u64x1::new(simd_extract!(a, 1))) }
 }
-#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1r)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14670,20 +15727,22 @@ pub unsafe fn vld1q_dup_s16(ptr: *const i16) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_dup_s32(ptr: *const i32) -> int32x4_t {
-    transmute(i32x4::splat(*ptr))
+pub fn vget_high_p64(a: poly64x2_t) -> poly64x1_t {
+    unsafe {
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+        transmute(u64x1::new(simd_extract!(a, 1)))
+    }
 }
-#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1r)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14693,20 +15752,19 @@ pub unsafe fn vld1q_dup_s32(ptr: *const i32) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_dup_s64(ptr: *const i64) -> int64x2_t {
-    transmute(i64x2::splat(*ptr))
+pub fn vget_high_s64(a: int64x2_t) -> int64x1_t {
+    unsafe { int64x1_t([simd_extract!(a, 1)]) }
 }
-#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1r)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14716,20 +15774,22 @@ pub unsafe fn vld1q_dup_s64(ptr: *const i64) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_dup_s8(ptr: *const i8) -> int8x16_t {
-    transmute(i8x16::splat(*ptr))
+pub fn vget_high_s64(a: int64x2_t) -> int64x1_t {
+    unsafe {
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        int64x1_t([simd_extract!(a, 1)])
+    }
 }
-#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1r)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14739,20 +15799,19 @@ pub unsafe fn vld1q_dup_s8(ptr: *const i8) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_dup_u16(ptr: *const u16) -> uint16x8_t {
-    transmute(u16x8::splat(*ptr))
+pub fn vget_high_u64(a: uint64x2_t) -> uint64x1_t {
+    unsafe { uint64x1_t([simd_extract!(a, 1)]) }
 }
-#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_u64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1r)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14762,44 +15821,102 @@ pub unsafe fn vld1q_dup_u16(ptr: *const u16) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_dup_u32(ptr: *const u32) -> uint32x4_t {
-    transmute(u32x4::splat(*ptr))
+pub fn vget_high_u64(a: uint64x2_t) -> uint64x1_t {
+    unsafe {
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        uint64x1_t([simd_extract!(a, 1)])
+    }
 }
-#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1r)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vget_lane_f16<const LANE: i32>(a: float16x4_t) -> f16 {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_extract!(a, LANE as u32) }
+}
+#[doc = "Duplicate vector element to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vget_lane_f16<const LANE: i32>(a: float16x4_t) -> f16 {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe {
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        simd_extract!(a, LANE as u32)
+    }
+}
+#[doc = "Duplicate vector element to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f16)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
 )]
-pub unsafe fn vld1q_dup_u64(ptr: *const u64) -> uint64x2_t {
-    transmute(u64x2::splat(*ptr))
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vgetq_lane_f16<const LANE: i32>(a: float16x8_t) -> f16 {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe { simd_extract!(a, LANE as u32) }
 }
-#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1r)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vgetq_lane_f16<const LANE: i32>(a: float16x8_t) -> f16 {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        simd_extract!(a, LANE as u32)
+    }
+}
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f32)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -14808,21 +15925,18 @@ pub unsafe fn vld1q_dup_u64(ptr: *const u64) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_dup_u8(ptr: *const u8) -> uint8x16_t {
-    transmute(u8x16::splat(*ptr))
+pub fn vget_lane_f32<const IMM5: i32>(v: float32x2_t) -> f32 {
+    static_assert_uimm_bits!(IMM5, 1);
+    unsafe { simd_extract!(v, IMM5 as u32) }
 }
-#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f32)"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ldr)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -14831,30 +15945,21 @@ pub unsafe fn vld1q_dup_u8(ptr: *const u8) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_dup_p64(ptr: *const p64) -> poly64x1_t {
-    let x: poly64x1_t;
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    {
-        x = crate::core_arch::aarch64::vld1_p64(ptr);
+pub fn vget_lane_f32<const IMM5: i32>(v: float32x2_t) -> f32 {
+    static_assert_uimm_bits!(IMM5, 1);
+    unsafe {
+        let v: float32x2_t = simd_shuffle!(v, v, [1, 0]);
+        simd_extract!(v, IMM5 as u32)
     }
-    #[cfg(target_arch = "arm")]
-    {
-        x = crate::core_arch::arm::vld1_p64(ptr);
-    };
-    x
 }
-#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ldr)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -14863,30 +15968,18 @@ pub unsafe fn vld1_dup_p64(ptr: *const p64) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_dup_s64(ptr: *const i64) -> int64x1_t {
-    let x: int64x1_t;
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    {
-        x = crate::core_arch::aarch64::vld1_s64(ptr);
-    }
-    #[cfg(target_arch = "arm")]
-    {
-        x = crate::core_arch::arm::vld1_s64(ptr);
-    };
-    x
+pub fn vget_lane_p16<const IMM5: i32>(v: poly16x4_t) -> p16 {
+    static_assert_uimm_bits!(IMM5, 2);
+    unsafe { simd_extract!(v, IMM5 as u32) }
 }
-#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ldr)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -14895,367 +15988,84 @@ pub unsafe fn vld1_dup_s64(ptr: *const i64) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_dup_u64(ptr: *const u64) -> uint64x1_t {
-    let x: uint64x1_t;
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    {
-        x = crate::core_arch::aarch64::vld1_u64(ptr);
+pub fn vget_lane_p16<const IMM5: i32>(v: poly16x4_t) -> p16 {
+    static_assert_uimm_bits!(IMM5, 2);
+    unsafe {
+        let v: poly16x4_t = simd_shuffle!(v, v, [3, 2, 1, 0]);
+        simd_extract!(v, IMM5 as u32)
     }
-    #[cfg(target_arch = "arm")]
-    {
-        x = crate::core_arch::arm::vld1_u64(ptr);
-    };
-    x
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
-pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t {
-    transmute(vld1_v4f16(
-        ptr as *const i8,
-        crate::mem::align_of::<f16>() as i32,
-    ))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
-pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t {
-    transmute(vld1q_v8f16(
-        ptr as *const i8,
-        crate::mem::align_of::<f16>() as i32,
-    ))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld1_f16_x2(a: *const f16) -> float16x4x2_t {
-    crate::ptr::read_unaligned(a.cast())
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld1_f16_x3(a: *const f16) -> float16x4x3_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_lane_p8<const IMM5: i32>(v: poly8x8_t) -> p8 {
+    static_assert_uimm_bits!(IMM5, 3);
+    unsafe { simd_extract!(v, IMM5 as u32) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld1_f16_x4(a: *const f16) -> float16x4x4_t {
-    crate::ptr::read_unaligned(a.cast())
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld1q_f16_x2(a: *const f16) -> float16x8x2_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_lane_p8<const IMM5: i32>(v: poly8x8_t) -> p8 {
+    static_assert_uimm_bits!(IMM5, 3);
+    unsafe {
+        let v: poly8x8_t = simd_shuffle!(v, v, [7, 6, 5, 4, 3, 2, 1, 0]);
+        simd_extract!(v, IMM5 as u32)
+    }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld1q_f16_x3(a: *const f16) -> float16x8x3_t {
-    crate::ptr::read_unaligned(a.cast())
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld1q_f16_x4(a: *const f16) -> float16x8x4_t {
-    crate::ptr::read_unaligned(a.cast())
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
-pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t {
-    const ALIGN: i32 = crate::mem::align_of::<f32>() as i32;
-    transmute(vld1_v2f32::<ALIGN>(ptr as *const i8))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
-pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t {
-    const ALIGN: i32 = crate::mem::align_of::<f32>() as i32;
-    transmute(vld1q_v4f32::<ALIGN>(ptr as *const i8))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
-pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t {
-    const ALIGN: i32 = crate::mem::align_of::<u8>() as i32;
-    transmute(vld1_v8i8::<ALIGN>(ptr as *const i8))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
-pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t {
-    const ALIGN: i32 = crate::mem::align_of::<u8>() as i32;
-    transmute(vld1q_v16i8::<ALIGN>(ptr as *const i8))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
-pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t {
-    const ALIGN: i32 = crate::mem::align_of::<u16>() as i32;
-    transmute(vld1_v4i16::<ALIGN>(ptr as *const i8))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
-pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t {
-    const ALIGN: i32 = crate::mem::align_of::<u16>() as i32;
-    transmute(vld1q_v8i16::<ALIGN>(ptr as *const i8))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
-pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t {
-    const ALIGN: i32 = crate::mem::align_of::<u32>() as i32;
-    transmute(vld1_v2i32::<ALIGN>(ptr as *const i8))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
-pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t {
-    const ALIGN: i32 = crate::mem::align_of::<u32>() as i32;
-    transmute(vld1q_v4i32::<ALIGN>(ptr as *const i8))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
-pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t {
-    const ALIGN: i32 = crate::mem::align_of::<u64>() as i32;
-    transmute(vld1_v1i64::<ALIGN>(ptr as *const i8))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))]
-pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t {
-    const ALIGN: i32 = crate::mem::align_of::<u64>() as i32;
-    transmute(vld1q_v2i64::<ALIGN>(ptr as *const i8))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
-pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t {
-    const ALIGN: i32 = crate::mem::align_of::<p8>() as i32;
-    transmute(vld1_v8i8::<ALIGN>(ptr as *const i8))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
-pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t {
-    const ALIGN: i32 = crate::mem::align_of::<p8>() as i32;
-    transmute(vld1q_v16i8::<ALIGN>(ptr as *const i8))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
-pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t {
-    const ALIGN: i32 = crate::mem::align_of::<p16>() as i32;
-    transmute(vld1_v4i16::<ALIGN>(ptr as *const i8))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
-pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t {
-    const ALIGN: i32 = crate::mem::align_of::<p16>() as i32;
-    transmute(vld1q_v8i16::<ALIGN>(ptr as *const i8))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,aes")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))]
-pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t {
-    const ALIGN: i32 = crate::mem::align_of::<p64>() as i32;
-    transmute(vld1q_v2i64::<ALIGN>(ptr as *const i8))
+pub fn vget_lane_s16<const IMM5: i32>(v: int16x4_t) -> i16 {
+    static_assert_uimm_bits!(IMM5, 2);
+    unsafe { simd_extract!(v, IMM5 as u32) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15264,21 +16074,21 @@ pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_f32_x2(a: *const f32) -> float32x2x2_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_lane_s16<const IMM5: i32>(v: int16x4_t) -> i16 {
+    static_assert_uimm_bits!(IMM5, 2);
+    unsafe {
+        let v: int16x4_t = simd_shuffle!(v, v, [3, 2, 1, 0]);
+        simd_extract!(v, IMM5 as u32)
+    }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15287,21 +16097,18 @@ pub unsafe fn vld1_f32_x2(a: *const f32) -> float32x2x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_f32_x3(a: *const f32) -> float32x2x3_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_lane_s32<const IMM5: i32>(v: int32x2_t) -> i32 {
+    static_assert_uimm_bits!(IMM5, 1);
+    unsafe { simd_extract!(v, IMM5 as u32) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15310,21 +16117,21 @@ pub unsafe fn vld1_f32_x3(a: *const f32) -> float32x2x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_f32_x4(a: *const f32) -> float32x2x4_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_lane_s32<const IMM5: i32>(v: int32x2_t) -> i32 {
+    static_assert_uimm_bits!(IMM5, 1);
+    unsafe {
+        let v: int32x2_t = simd_shuffle!(v, v, [1, 0]);
+        simd_extract!(v, IMM5 as u32)
+    }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15333,21 +16140,18 @@ pub unsafe fn vld1_f32_x4(a: *const f32) -> float32x2x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_f32_x2(a: *const f32) -> float32x4x2_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_lane_s8<const IMM5: i32>(v: int8x8_t) -> i8 {
+    static_assert_uimm_bits!(IMM5, 3);
+    unsafe { simd_extract!(v, IMM5 as u32) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15356,21 +16160,21 @@ pub unsafe fn vld1q_f32_x2(a: *const f32) -> float32x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_f32_x3(a: *const f32) -> float32x4x3_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_lane_s8<const IMM5: i32>(v: int8x8_t) -> i8 {
+    static_assert_uimm_bits!(IMM5, 3);
+    unsafe {
+        let v: int8x8_t = simd_shuffle!(v, v, [7, 6, 5, 4, 3, 2, 1, 0]);
+        simd_extract!(v, IMM5 as u32)
+    }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15379,62 +16183,41 @@ pub unsafe fn vld1q_f32_x3(a: *const f32) -> float32x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_f32_x4(a: *const f32) -> float32x4x4_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_lane_u16<const IMM5: i32>(v: uint16x4_t) -> u16 {
+    static_assert_uimm_bits!(IMM5, 2);
+    unsafe { simd_extract!(v, IMM5 as u32) }
 }
-#[doc = "Load one single-element structure to one lane of one register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1, LANE = 0))]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1, LANE = 0)
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld1_lane_f16<const LANE: i32>(ptr: *const f16, src: float16x4_t) -> float16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_insert!(src, LANE as u32, *ptr)
-}
-#[doc = "Load one single-element structure to one lane of one register"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1, LANE = 0))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1, LANE = 0)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld1q_lane_f16<const LANE: i32>(ptr: *const f16, src: float16x8_t) -> float16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    simd_insert!(src, LANE as u32, *ptr)
+pub fn vget_lane_u16<const IMM5: i32>(v: uint16x4_t) -> u16 {
+    static_assert_uimm_bits!(IMM5, 2);
+    unsafe {
+        let v: uint16x4_t = simd_shuffle!(v, v, [3, 2, 1, 0]);
+        simd_extract!(v, IMM5 as u32)
+    }
 }
-#[doc = "Load one single-element structure to one lane of one register."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1, LANE = 1)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15443,23 +16226,18 @@ pub unsafe fn vld1q_lane_f16<const LANE: i32>(ptr: *const f16, src: float16x8_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_lane_f32<const LANE: i32>(ptr: *const f32, src: float32x2_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    simd_insert!(src, LANE as u32, *ptr)
+pub fn vget_lane_u32<const IMM5: i32>(v: uint32x2_t) -> u32 {
+    static_assert_uimm_bits!(IMM5, 1);
+    unsafe { simd_extract!(v, IMM5 as u32) }
 }
-#[doc = "Load one single-element structure to one lane of one register."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1, LANE = 3)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15468,23 +16246,21 @@ pub unsafe fn vld1_lane_f32<const LANE: i32>(ptr: *const f32, src: float32x2_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_lane_p16<const LANE: i32>(ptr: *const p16, src: poly16x4_t) -> poly16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_insert!(src, LANE as u32, *ptr)
+pub fn vget_lane_u32<const IMM5: i32>(v: uint32x2_t) -> u32 {
+    static_assert_uimm_bits!(IMM5, 1);
+    unsafe {
+        let v: uint32x2_t = simd_shuffle!(v, v, [1, 0]);
+        simd_extract!(v, IMM5 as u32)
+    }
 }
-#[doc = "Load one single-element structure to one lane of one register."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1, LANE = 7)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15493,23 +16269,18 @@ pub unsafe fn vld1_lane_p16<const LANE: i32>(ptr: *const p16, src: poly16x4_t) -
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_lane_p8<const LANE: i32>(ptr: *const p8, src: poly8x8_t) -> poly8x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    simd_insert!(src, LANE as u32, *ptr)
+pub fn vget_lane_u8<const IMM5: i32>(v: uint8x8_t) -> u8 {
+    static_assert_uimm_bits!(IMM5, 3);
+    unsafe { simd_extract!(v, IMM5 as u32) }
 }
-#[doc = "Load one single-element structure to one lane of one register."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1, LANE = 3)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15518,23 +16289,21 @@ pub unsafe fn vld1_lane_p8<const LANE: i32>(ptr: *const p8, src: poly8x8_t) -> p
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_lane_s16<const LANE: i32>(ptr: *const i16, src: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_insert!(src, LANE as u32, *ptr)
+pub fn vget_lane_u8<const IMM5: i32>(v: uint8x8_t) -> u8 {
+    static_assert_uimm_bits!(IMM5, 3);
+    unsafe {
+        let v: uint8x8_t = simd_shuffle!(v, v, [7, 6, 5, 4, 3, 2, 1, 0]);
+        simd_extract!(v, IMM5 as u32)
+    }
 }
-#[doc = "Load one single-element structure to one lane of one register."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1, LANE = 1)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15543,23 +16312,18 @@ pub unsafe fn vld1_lane_s16<const LANE: i32>(ptr: *const i16, src: int16x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_lane_s32<const LANE: i32>(ptr: *const i32, src: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    simd_insert!(src, LANE as u32, *ptr)
+pub fn vgetq_lane_f32<const IMM5: i32>(v: float32x4_t) -> f32 {
+    static_assert_uimm_bits!(IMM5, 2);
+    unsafe { simd_extract!(v, IMM5 as u32) }
 }
-#[doc = "Load one single-element structure to one lane of one register."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ldr, LANE = 0)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15568,23 +16332,21 @@ pub unsafe fn vld1_lane_s32<const LANE: i32>(ptr: *const i32, src: int32x2_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_lane_s64<const LANE: i32>(ptr: *const i64, src: int64x1_t) -> int64x1_t {
-    static_assert!(LANE == 0);
-    simd_insert!(src, LANE as u32, *ptr)
+pub fn vgetq_lane_f32<const IMM5: i32>(v: float32x4_t) -> f32 {
+    static_assert_uimm_bits!(IMM5, 2);
+    unsafe {
+        let v: float32x4_t = simd_shuffle!(v, v, [3, 2, 1, 0]);
+        simd_extract!(v, IMM5 as u32)
+    }
 }
-#[doc = "Load one single-element structure to one lane of one register."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1, LANE = 7)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15593,23 +16355,18 @@ pub unsafe fn vld1_lane_s64<const LANE: i32>(ptr: *const i64, src: int64x1_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_lane_s8<const LANE: i32>(ptr: *const i8, src: int8x8_t) -> int8x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    simd_insert!(src, LANE as u32, *ptr)
+pub fn vgetq_lane_p16<const IMM5: i32>(v: poly16x8_t) -> p16 {
+    static_assert_uimm_bits!(IMM5, 3);
+    unsafe { simd_extract!(v, IMM5 as u32) }
 }
-#[doc = "Load one single-element structure to one lane of one register."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1, LANE = 3)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15618,23 +16375,21 @@ pub unsafe fn vld1_lane_s8<const LANE: i32>(ptr: *const i8, src: int8x8_t) -> in
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_lane_u16<const LANE: i32>(ptr: *const u16, src: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_insert!(src, LANE as u32, *ptr)
+pub fn vgetq_lane_p16<const IMM5: i32>(v: poly16x8_t) -> p16 {
+    static_assert_uimm_bits!(IMM5, 3);
+    unsafe {
+        let v: poly16x8_t = simd_shuffle!(v, v, [7, 6, 5, 4, 3, 2, 1, 0]);
+        simd_extract!(v, IMM5 as u32)
+    }
 }
-#[doc = "Load one single-element structure to one lane of one register."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1, LANE = 1)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15643,23 +16398,18 @@ pub unsafe fn vld1_lane_u16<const LANE: i32>(ptr: *const u16, src: uint16x4_t) -
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_lane_u32<const LANE: i32>(ptr: *const u32, src: uint32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    simd_insert!(src, LANE as u32, *ptr)
+pub fn vgetq_lane_p64<const IMM5: i32>(v: poly64x2_t) -> p64 {
+    static_assert_uimm_bits!(IMM5, 1);
+    unsafe { simd_extract!(v, IMM5 as u32) }
 }
-#[doc = "Load one single-element structure to one lane of one register."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ldr, LANE = 0)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15668,23 +16418,21 @@ pub unsafe fn vld1_lane_u32<const LANE: i32>(ptr: *const u32, src: uint32x2_t) -
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_lane_u64<const LANE: i32>(ptr: *const u64, src: uint64x1_t) -> uint64x1_t {
-    static_assert!(LANE == 0);
-    simd_insert!(src, LANE as u32, *ptr)
+pub fn vgetq_lane_p64<const IMM5: i32>(v: poly64x2_t) -> p64 {
+    static_assert_uimm_bits!(IMM5, 1);
+    unsafe {
+        let v: poly64x2_t = simd_shuffle!(v, v, [1, 0]);
+        simd_extract!(v, IMM5 as u32)
+    }
 }
-#[doc = "Load one single-element structure to one lane of one register."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1, LANE = 7)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15693,23 +16441,18 @@ pub unsafe fn vld1_lane_u64<const LANE: i32>(ptr: *const u64, src: uint64x1_t) -
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_lane_u8<const LANE: i32>(ptr: *const u8, src: uint8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    simd_insert!(src, LANE as u32, *ptr)
+pub fn vgetq_lane_p8<const IMM5: i32>(v: poly8x16_t) -> p8 {
+    static_assert_uimm_bits!(IMM5, 4);
+    unsafe { simd_extract!(v, IMM5 as u32) }
 }
-#[doc = "Load one single-element structure to one lane of one register."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1, LANE = 3)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15718,23 +16461,22 @@ pub unsafe fn vld1_lane_u8<const LANE: i32>(ptr: *const u8, src: uint8x8_t) -> u
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_lane_f32<const LANE: i32>(ptr: *const f32, src: float32x4_t) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_insert!(src, LANE as u32, *ptr)
+pub fn vgetq_lane_p8<const IMM5: i32>(v: poly8x16_t) -> p8 {
+    static_assert_uimm_bits!(IMM5, 4);
+    unsafe {
+        let v: poly8x16_t =
+            simd_shuffle!(v, v, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        simd_extract!(v, IMM5 as u32)
+    }
 }
-#[doc = "Load one single-element structure to one lane of one register."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1, LANE = 7)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15743,23 +16485,18 @@ pub unsafe fn vld1q_lane_f32<const LANE: i32>(ptr: *const f32, src: float32x4_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_lane_p16<const LANE: i32>(ptr: *const p16, src: poly16x8_t) -> poly16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    simd_insert!(src, LANE as u32, *ptr)
+pub fn vgetq_lane_s16<const IMM5: i32>(v: int16x8_t) -> i16 {
+    static_assert_uimm_bits!(IMM5, 3);
+    unsafe { simd_extract!(v, IMM5 as u32) }
 }
-#[doc = "Load one single-element structure to one lane of one register."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1, LANE = 15)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15768,23 +16505,21 @@ pub unsafe fn vld1q_lane_p16<const LANE: i32>(ptr: *const p16, src: poly16x8_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_lane_p8<const LANE: i32>(ptr: *const p8, src: poly8x16_t) -> poly8x16_t {
-    static_assert_uimm_bits!(LANE, 4);
-    simd_insert!(src, LANE as u32, *ptr)
+pub fn vgetq_lane_s16<const IMM5: i32>(v: int16x8_t) -> i16 {
+    static_assert_uimm_bits!(IMM5, 3);
+    unsafe {
+        let v: int16x8_t = simd_shuffle!(v, v, [7, 6, 5, 4, 3, 2, 1, 0]);
+        simd_extract!(v, IMM5 as u32)
+    }
 }
-#[doc = "Load one single-element structure to one lane of one register."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1, LANE = 7)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15793,23 +16528,18 @@ pub unsafe fn vld1q_lane_p8<const LANE: i32>(ptr: *const p8, src: poly8x16_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_lane_s16<const LANE: i32>(ptr: *const i16, src: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    simd_insert!(src, LANE as u32, *ptr)
+pub fn vgetq_lane_s32<const IMM5: i32>(v: int32x4_t) -> i32 {
+    static_assert_uimm_bits!(IMM5, 2);
+    unsafe { simd_extract!(v, IMM5 as u32) }
 }
-#[doc = "Load one single-element structure to one lane of one register."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1, LANE = 3)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15818,23 +16548,21 @@ pub unsafe fn vld1q_lane_s16<const LANE: i32>(ptr: *const i16, src: int16x8_t) -
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_lane_s32<const LANE: i32>(ptr: *const i32, src: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_insert!(src, LANE as u32, *ptr)
+pub fn vgetq_lane_s32<const IMM5: i32>(v: int32x4_t) -> i32 {
+    static_assert_uimm_bits!(IMM5, 2);
+    unsafe {
+        let v: int32x4_t = simd_shuffle!(v, v, [3, 2, 1, 0]);
+        simd_extract!(v, IMM5 as u32)
+    }
 }
-#[doc = "Load one single-element structure to one lane of one register."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1, LANE = 1)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15843,23 +16571,18 @@ pub unsafe fn vld1q_lane_s32<const LANE: i32>(ptr: *const i32, src: int32x4_t) -
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_lane_s64<const LANE: i32>(ptr: *const i64, src: int64x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    simd_insert!(src, LANE as u32, *ptr)
+pub fn vgetq_lane_s64<const IMM5: i32>(v: int64x2_t) -> i64 {
+    static_assert_uimm_bits!(IMM5, 1);
+    unsafe { simd_extract!(v, IMM5 as u32) }
 }
-#[doc = "Load one single-element structure to one lane of one register."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1, LANE = 15)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15868,23 +16591,21 @@ pub unsafe fn vld1q_lane_s64<const LANE: i32>(ptr: *const i64, src: int64x2_t) -
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_lane_s8<const LANE: i32>(ptr: *const i8, src: int8x16_t) -> int8x16_t {
-    static_assert_uimm_bits!(LANE, 4);
-    simd_insert!(src, LANE as u32, *ptr)
+pub fn vgetq_lane_s64<const IMM5: i32>(v: int64x2_t) -> i64 {
+    static_assert_uimm_bits!(IMM5, 1);
+    unsafe {
+        let v: int64x2_t = simd_shuffle!(v, v, [1, 0]);
+        simd_extract!(v, IMM5 as u32)
+    }
 }
-#[doc = "Load one single-element structure to one lane of one register."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1, LANE = 7)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15893,23 +16614,18 @@ pub unsafe fn vld1q_lane_s8<const LANE: i32>(ptr: *const i8, src: int8x16_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_lane_u16<const LANE: i32>(ptr: *const u16, src: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    simd_insert!(src, LANE as u32, *ptr)
+pub fn vgetq_lane_s8<const IMM5: i32>(v: int8x16_t) -> i8 {
+    static_assert_uimm_bits!(IMM5, 4);
+    unsafe { simd_extract!(v, IMM5 as u32) }
 }
-#[doc = "Load one single-element structure to one lane of one register."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1, LANE = 3)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15918,23 +16634,22 @@ pub unsafe fn vld1q_lane_u16<const LANE: i32>(ptr: *const u16, src: uint16x8_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_lane_u32<const LANE: i32>(ptr: *const u32, src: uint32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_insert!(src, LANE as u32, *ptr)
+pub fn vgetq_lane_s8<const IMM5: i32>(v: int8x16_t) -> i8 {
+    static_assert_uimm_bits!(IMM5, 4);
+    unsafe {
+        let v: int8x16_t =
+            simd_shuffle!(v, v, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        simd_extract!(v, IMM5 as u32)
+    }
 }
-#[doc = "Load one single-element structure to one lane of one register."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1, LANE = 1)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15943,23 +16658,18 @@ pub unsafe fn vld1q_lane_u32<const LANE: i32>(ptr: *const u32, src: uint32x4_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_lane_u64<const LANE: i32>(ptr: *const u64, src: uint64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    simd_insert!(src, LANE as u32, *ptr)
+pub fn vgetq_lane_u16<const IMM5: i32>(v: uint16x8_t) -> u16 {
+    static_assert_uimm_bits!(IMM5, 3);
+    unsafe { simd_extract!(v, IMM5 as u32) }
 }
-#[doc = "Load one single-element structure to one lane of one register."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1, LANE = 15)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15968,23 +16678,21 @@ pub unsafe fn vld1q_lane_u64<const LANE: i32>(ptr: *const u64, src: uint64x2_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_lane_u8<const LANE: i32>(ptr: *const u8, src: uint8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(LANE, 4);
-    simd_insert!(src, LANE as u32, *ptr)
+pub fn vgetq_lane_u16<const IMM5: i32>(v: uint16x8_t) -> u16 {
+    static_assert_uimm_bits!(IMM5, 3);
+    unsafe {
+        let v: uint16x8_t = simd_shuffle!(v, v, [7, 6, 5, 4, 3, 2, 1, 0]);
+        simd_extract!(v, IMM5 as u32)
+    }
 }
-#[doc = "Load one single-element structure to one lane of one register."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u32)"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ldr, LANE = 0)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15993,23 +16701,18 @@ pub unsafe fn vld1q_lane_u8<const LANE: i32>(ptr: *const u8, src: uint8x16_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_lane_p64<const LANE: i32>(ptr: *const p64, src: poly64x1_t) -> poly64x1_t {
-    static_assert!(LANE == 0);
-    simd_insert!(src, LANE as u32, *ptr)
+pub fn vgetq_lane_u32<const IMM5: i32>(v: uint32x4_t) -> u32 {
+    static_assert_uimm_bits!(IMM5, 2);
+    unsafe { simd_extract!(v, IMM5 as u32) }
 }
-#[doc = "Load one single-element structure to one lane of one register."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u32)"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1, LANE = 1)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16018,40 +16721,21 @@ pub unsafe fn vld1_lane_p64<const LANE: i32>(ptr: *const p64, src: poly64x1_t) -
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_lane_p64<const LANE: i32>(ptr: *const p64, src: poly64x2_t) -> poly64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    simd_insert!(src, LANE as u32, *ptr)
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,aes")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
-pub unsafe fn vld1_p64(ptr: *const p64) -> poly64x1_t {
-    let a: *const i8 = ptr as *const i8;
-    let b: i32 = crate::mem::align_of::<p64>() as i32;
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v1i64")]
-        fn _vld1_v1i64(a: *const i8, b: i32) -> int64x1_t;
+pub fn vgetq_lane_u32<const IMM5: i32>(v: uint32x4_t) -> u32 {
+    static_assert_uimm_bits!(IMM5, 2);
+    unsafe {
+        let v: uint32x4_t = simd_shuffle!(v, v, [3, 2, 1, 0]);
+        simd_extract!(v, IMM5 as u32)
     }
-    transmute(_vld1_v1i64(a, b))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u64)"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16060,21 +16744,18 @@ pub unsafe fn vld1_p64(ptr: *const p64) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_p64_x2(a: *const p64) -> poly64x1x2_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vgetq_lane_u64<const IMM5: i32>(v: uint64x2_t) -> u64 {
+    static_assert_uimm_bits!(IMM5, 2);
+    unsafe { simd_extract!(v, IMM5 as u32) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u64)"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16083,21 +16764,21 @@ pub unsafe fn vld1_p64_x2(a: *const p64) -> poly64x1x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_p64_x3(a: *const p64) -> poly64x1x3_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vgetq_lane_u64<const IMM5: i32>(v: uint64x2_t) -> u64 {
+    static_assert_uimm_bits!(IMM5, 2);
+    unsafe {
+        let v: uint64x2_t = simd_shuffle!(v, v, [1, 0]);
+        simd_extract!(v, IMM5 as u32)
+    }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u8)"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16106,21 +16787,18 @@ pub unsafe fn vld1_p64_x3(a: *const p64) -> poly64x1x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_p64_x4(a: *const p64) -> poly64x1x4_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vgetq_lane_u8<const IMM5: i32>(v: uint8x16_t) -> u8 {
+    static_assert_uimm_bits!(IMM5, 4);
+    unsafe { simd_extract!(v, IMM5 as u32) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u8)"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16129,21 +16807,21 @@ pub unsafe fn vld1_p64_x4(a: *const p64) -> poly64x1x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vgetq_lane_u8<const IMM5: i32>(v: uint8x16_t) -> u8 {
+    static_assert_uimm_bits!(IMM5, 4);
+    unsafe {
+        let v: uint8x16_t =
+            simd_shuffle!(v, v, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        simd_extract!(v, IMM5 as u32)
+    }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p64)"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16152,21 +16830,17 @@ pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_lane_p64<const IMM5: i32>(v: poly64x1_t) -> p64 {
+    static_assert!(IMM5 == 0);
+    unsafe { simd_extract!(v, IMM5 as u32) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s64)"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16175,125 +16849,17 @@ pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t {
-    crate::ptr::read_unaligned(a.cast())
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
-pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t {
-    const ALIGN: i32 = crate::mem::align_of::<i8>() as i32;
-    vld1_v8i8::<ALIGN>(ptr as *const i8)
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
-pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t {
-    const ALIGN: i32 = crate::mem::align_of::<i8>() as i32;
-    vld1q_v16i8::<ALIGN>(ptr as *const i8)
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
-pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t {
-    const ALIGN: i32 = crate::mem::align_of::<i16>() as i32;
-    vld1_v4i16::<ALIGN>(ptr as *const i8)
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
-pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t {
-    const ALIGN: i32 = crate::mem::align_of::<i16>() as i32;
-    vld1q_v8i16::<ALIGN>(ptr as *const i8)
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
-pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t {
-    const ALIGN: i32 = crate::mem::align_of::<i32>() as i32;
-    vld1_v2i32::<ALIGN>(ptr as *const i8)
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
-pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t {
-    const ALIGN: i32 = crate::mem::align_of::<i32>() as i32;
-    vld1q_v4i32::<ALIGN>(ptr as *const i8)
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
-pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t {
-    const ALIGN: i32 = crate::mem::align_of::<i64>() as i32;
-    vld1_v1i64::<ALIGN>(ptr as *const i8)
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))]
-pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t {
-    const ALIGN: i32 = crate::mem::align_of::<i64>() as i32;
-    vld1q_v2i64::<ALIGN>(ptr as *const i8)
+pub fn vget_lane_s64<const IMM5: i32>(v: int64x1_t) -> i64 {
+    static_assert!(IMM5 == 0);
+    unsafe { simd_extract!(v, IMM5 as u32) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Move vector element to general-purpose register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16302,21 +16868,17 @@ pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_s8_x2(a: *const i8) -> int8x8x2_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_lane_u64<const IMM5: i32>(v: uint64x1_t) -> u64 {
+    static_assert!(IMM5 == 0);
+    unsafe { simd_extract!(v, 0) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16325,21 +16887,16 @@ pub unsafe fn vld1_s8_x2(a: *const i8) -> int8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_s8_x3(a: *const i8) -> int8x8x3_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_low_f32(a: float32x4_t) -> float32x2_t {
+    unsafe { simd_shuffle!(a, a, [0, 1]) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16348,21 +16905,20 @@ pub unsafe fn vld1_s8_x3(a: *const i8) -> int8x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_s8_x4(a: *const i8) -> int8x8x4_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_low_f32(a: float32x4_t) -> float32x2_t {
+    unsafe {
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: float32x2_t = simd_shuffle!(a, a, [0, 1]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16371,21 +16927,16 @@ pub unsafe fn vld1_s8_x4(a: *const i8) -> int8x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_s8_x2(a: *const i8) -> int8x16x2_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_low_p16(a: poly16x8_t) -> poly16x4_t {
+    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16394,21 +16945,20 @@ pub unsafe fn vld1q_s8_x2(a: *const i8) -> int8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_s8_x3(a: *const i8) -> int8x16x3_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_low_p16(a: poly16x8_t) -> poly16x4_t {
+    unsafe {
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly16x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16417,21 +16967,16 @@ pub unsafe fn vld1q_s8_x3(a: *const i8) -> int8x16x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_s8_x4(a: *const i8) -> int8x16x4_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_low_p8(a: poly8x16_t) -> poly8x8_t {
+    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16440,21 +16985,21 @@ pub unsafe fn vld1q_s8_x4(a: *const i8) -> int8x16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_s16_x2(a: *const i16) -> int16x4x2_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_low_p8(a: poly8x16_t) -> poly8x8_t {
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16463,21 +17008,16 @@ pub unsafe fn vld1_s16_x2(a: *const i16) -> int16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_s16_x3(a: *const i16) -> int16x4x3_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_low_s16(a: int16x8_t) -> int16x4_t {
+    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16486,21 +17026,20 @@ pub unsafe fn vld1_s16_x3(a: *const i16) -> int16x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_s16_x4(a: *const i16) -> int16x4x4_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_low_s16(a: int16x8_t) -> int16x4_t {
+    unsafe {
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16509,21 +17048,16 @@ pub unsafe fn vld1_s16_x4(a: *const i16) -> int16x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_s16_x2(a: *const i16) -> int16x8x2_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_low_s32(a: int32x4_t) -> int32x2_t {
+    unsafe { simd_shuffle!(a, a, [0, 1]) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16532,21 +17066,20 @@ pub unsafe fn vld1q_s16_x2(a: *const i16) -> int16x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_s16_x3(a: *const i16) -> int16x8x3_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_low_s32(a: int32x4_t) -> int32x2_t {
+    unsafe {
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int32x2_t = simd_shuffle!(a, a, [0, 1]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16555,21 +17088,16 @@ pub unsafe fn vld1q_s16_x3(a: *const i16) -> int16x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_s16_x4(a: *const i16) -> int16x8x4_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_low_s8(a: int8x16_t) -> int8x8_t {
+    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16578,21 +17106,21 @@ pub unsafe fn vld1q_s16_x4(a: *const i16) -> int16x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_s32_x2(a: *const i32) -> int32x2x2_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_low_s8(a: int8x16_t) -> int8x8_t {
+    unsafe {
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16601,21 +17129,16 @@ pub unsafe fn vld1_s32_x2(a: *const i32) -> int32x2x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_s32_x3(a: *const i32) -> int32x2x3_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_low_u16(a: uint16x8_t) -> uint16x4_t {
+    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16624,21 +17147,20 @@ pub unsafe fn vld1_s32_x3(a: *const i32) -> int32x2x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_s32_x4(a: *const i32) -> int32x2x4_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_low_u16(a: uint16x8_t) -> uint16x4_t {
+    unsafe {
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16647,21 +17169,16 @@ pub unsafe fn vld1_s32_x4(a: *const i32) -> int32x2x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_s32_x2(a: *const i32) -> int32x4x2_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_low_u32(a: uint32x4_t) -> uint32x2_t {
+    unsafe { simd_shuffle!(a, a, [0, 1]) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16670,21 +17187,20 @@ pub unsafe fn vld1q_s32_x2(a: *const i32) -> int32x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_s32_x3(a: *const i32) -> int32x4x3_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_low_u32(a: uint32x4_t) -> uint32x2_t {
+    unsafe {
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint32x2_t = simd_shuffle!(a, a, [0, 1]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16693,21 +17209,16 @@ pub unsafe fn vld1q_s32_x3(a: *const i32) -> int32x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_s32_x4(a: *const i32) -> int32x4x4_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_low_u8(a: uint8x16_t) -> uint8x8_t {
+    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16716,21 +17227,21 @@ pub unsafe fn vld1q_s32_x4(a: *const i32) -> int32x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_s64_x2(a: *const i64) -> int64x1x2_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_low_u8(a: uint8x16_t) -> uint8x8_t {
+    unsafe {
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16739,21 +17250,16 @@ pub unsafe fn vld1_s64_x2(a: *const i64) -> int64x1x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_s64_x3(a: *const i64) -> int64x1x3_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_low_p64(a: poly64x2_t) -> poly64x1_t {
+    unsafe { transmute(u64x1::new(simd_extract!(a, 0))) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16762,21 +17268,19 @@ pub unsafe fn vld1_s64_x3(a: *const i64) -> int64x1x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_s64_x4(a: *const i64) -> int64x1x4_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_low_p64(a: poly64x2_t) -> poly64x1_t {
+    unsafe {
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+        transmute(u64x1::new(simd_extract!(a, 0)))
+    }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16785,21 +17289,16 @@ pub unsafe fn vld1_s64_x4(a: *const i64) -> int64x1x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_s64_x2(a: *const i64) -> int64x2x2_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_low_s64(a: int64x2_t) -> int64x1_t {
+    unsafe { int64x1_t([simd_extract!(a, 0)]) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16808,21 +17307,19 @@ pub unsafe fn vld1q_s64_x2(a: *const i64) -> int64x2x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_s64_x3(a: *const i64) -> int64x2x3_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_low_s64(a: int64x2_t) -> int64x1_t {
+    unsafe {
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        int64x1_t([simd_extract!(a, 0)])
+    }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16831,21 +17328,16 @@ pub unsafe fn vld1q_s64_x3(a: *const i64) -> int64x2x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_s64_x4(a: *const i64) -> int64x2x4_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_low_u64(a: uint64x2_t) -> uint64x1_t {
+    unsafe { uint64x1_t([simd_extract!(a, 0)]) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_u64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16854,20 +17346,21 @@ pub unsafe fn vld1q_s64_x4(a: *const i64) -> int64x2x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vget_low_u64(a: uint64x2_t) -> uint64x1_t {
+    unsafe {
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        uint64x1_t([simd_extract!(a, 0)])
+    }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(shadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -16877,20 +17370,26 @@ pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shadd.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i8")]
+        fn _vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    unsafe { _vhadd_s8(a, b) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(shadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -16900,20 +17399,26 @@ pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shadd.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v16i8")]
+        fn _vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    unsafe { _vhaddq_s8(a, b) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(shadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -16923,20 +17428,26 @@ pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shadd.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i16")]
+        fn _vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    unsafe { _vhadd_s16(a, b) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(shadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -16946,20 +17457,26 @@ pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shadd.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i16")]
+        fn _vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    unsafe { _vhaddq_s16(a, b) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(shadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -16969,20 +17486,26 @@ pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shadd.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v2i32")]
+        fn _vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    unsafe { _vhadd_s32(a, b) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(shadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -16992,20 +17515,26 @@ pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shadd.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i32")]
+        fn _vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    unsafe { _vhaddq_s32(a, b) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(uhadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17015,20 +17544,26 @@ pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhadd.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i8")]
+        fn _vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
+    }
+    unsafe { _vhadd_u8(a, b) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(uhadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17038,20 +17573,26 @@ pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhadd.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v16i8")]
+        fn _vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
+    }
+    unsafe { _vhaddq_u8(a, b) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(uhadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17061,20 +17602,26 @@ pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhadd.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i16")]
+        fn _vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
+    }
+    unsafe { _vhadd_u16(a, b) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(uhadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17084,20 +17631,26 @@ pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhadd.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i16")]
+        fn _vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
+    }
+    unsafe { _vhaddq_u16(a, b) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(uhadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17107,20 +17660,26 @@ pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhadd.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v2i32")]
+        fn _vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
+    }
+    unsafe { _vhadd_u32(a, b) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(uhadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17130,20 +17689,26 @@ pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t {
-    crate::ptr::read_unaligned(a.cast())
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+pub fn vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhadd.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i32")]
+        fn _vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
+    }
+    unsafe { _vhaddq_u32(a, b) }
+}
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(shsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17153,20 +17718,26 @@ pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shsub.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i16")]
+        fn _vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    unsafe { _vhsub_s16(a, b) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(shsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17176,20 +17747,26 @@ pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shsub.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i16")]
+        fn _vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    unsafe { _vhsubq_s16(a, b) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(shsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17199,20 +17776,26 @@ pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shsub.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v2i32")]
+        fn _vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    unsafe { _vhsub_s32(a, b) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(shsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17222,20 +17805,26 @@ pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shsub.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i32")]
+        fn _vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    unsafe { _vhsubq_s32(a, b) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(shsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17245,20 +17834,26 @@ pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shsub.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i8")]
+        fn _vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    unsafe { _vhsub_s8(a, b) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(shsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17268,20 +17863,26 @@ pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u64_x2(a: *const u64) -> uint64x1x2_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shsub.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v16i8")]
+        fn _vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    unsafe { _vhsubq_s8(a, b) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(uhsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17291,20 +17892,26 @@ pub unsafe fn vld1_u64_x2(a: *const u64) -> uint64x1x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u64_x3(a: *const u64) -> uint64x1x3_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhsub.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i8")]
+        fn _vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
+    }
+    unsafe { _vhsub_u8(a, b) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(uhsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17314,20 +17921,26 @@ pub unsafe fn vld1_u64_x3(a: *const u64) -> uint64x1x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u64_x4(a: *const u64) -> uint64x1x4_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhsub.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v16i8")]
+        fn _vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
+    }
+    unsafe { _vhsubq_u8(a, b) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(uhsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17337,20 +17950,153 @@ pub unsafe fn vld1_u64_x4(a: *const u64) -> uint64x1x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t {
-    crate::ptr::read_unaligned(a.cast())
+pub fn vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhsub.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i16")]
+        fn _vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
+    }
+    unsafe { _vhsub_u16(a, b) }
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x3)"]
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uhsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhsub.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i16")]
+        fn _vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
+    }
+    unsafe { _vhsubq_u16(a, b) }
+}
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uhsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhsub.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v2i32")]
+        fn _vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
+    }
+    unsafe { _vhsub_u32(a, b) }
+}
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uhsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhsub.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i32")]
+        fn _vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
+    }
+    unsafe { _vhsubq_u32(a, b) }
+}
+#[doc = "Load one single-element structure and replicate to all lanes of one register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(ld1r)
+)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld1_dup_f16(ptr: *const f16) -> float16x4_t {
+    let x: float16x4_t = vld1_lane_f16::<0>(ptr, transmute(f16x4::splat(0.0)));
+    simd_shuffle!(x, x, [0, 0, 0, 0])
+}
+#[doc = "Load one single-element structure and replicate to all lanes of one register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld1q_dup_f16(ptr: *const f16) -> float16x8_t {
+    let x: float16x8_t = vld1q_lane_f16::<0>(ptr, transmute(f16x8::splat(0.0)));
+    simd_shuffle!(x, x, [0, 0, 0, 0, 0, 0, 0, 0])
+}
+#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17360,20 +18106,20 @@ pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t {
-    crate::ptr::read_unaligned(a.cast())
+pub unsafe fn vld1_dup_f32(ptr: *const f32) -> float32x2_t {
+    transmute(f32x2::splat(*ptr))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x4)"]
+#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(ld1r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17383,20 +18129,20 @@ pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t {
-    crate::ptr::read_unaligned(a.cast())
+pub unsafe fn vld1_dup_p16(ptr: *const p16) -> poly16x4_t {
+    transmute(u16x4::splat(*ptr))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x2)"]
+#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(ld1r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17406,20 +18152,20 @@ pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t {
-    crate::ptr::read_unaligned(a.cast())
+pub unsafe fn vld1_dup_p8(ptr: *const p8) -> poly8x8_t {
+    transmute(u8x8::splat(*ptr))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x3)"]
+#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(ld1r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17429,20 +18175,20 @@ pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t {
-    crate::ptr::read_unaligned(a.cast())
+pub unsafe fn vld1_dup_s16(ptr: *const i16) -> int16x4_t {
+    transmute(i16x4::splat(*ptr))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x4)"]
+#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(ld1r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17452,20 +18198,20 @@ pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t {
-    crate::ptr::read_unaligned(a.cast())
+pub unsafe fn vld1_dup_s32(ptr: *const i32) -> int32x2_t {
+    transmute(i32x2::splat(*ptr))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x2)"]
+#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(ld1r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17475,20 +18221,20 @@ pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t {
-    crate::ptr::read_unaligned(a.cast())
+pub unsafe fn vld1_dup_s8(ptr: *const i8) -> int8x8_t {
+    transmute(i8x8::splat(*ptr))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x3)"]
+#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(ld1r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17498,20 +18244,20 @@ pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t {
-    crate::ptr::read_unaligned(a.cast())
+pub unsafe fn vld1_dup_u16(ptr: *const u16) -> uint16x4_t {
+    transmute(u16x4::splat(*ptr))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x4)"]
+#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(ld1r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17521,20 +18267,20 @@ pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t {
-    crate::ptr::read_unaligned(a.cast())
+pub unsafe fn vld1_dup_u32(ptr: *const u32) -> uint32x2_t {
+    transmute(u32x2::splat(*ptr))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x2)"]
+#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(ld1r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17544,20 +18290,20 @@ pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t {
-    crate::ptr::read_unaligned(a.cast())
+pub unsafe fn vld1_dup_u8(ptr: *const u8) -> uint8x8_t {
+    transmute(u8x8::splat(*ptr))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x3)"]
+#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(ld1r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17567,20 +18313,20 @@ pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t {
-    crate::ptr::read_unaligned(a.cast())
+pub unsafe fn vld1q_dup_f32(ptr: *const f32) -> float32x4_t {
+    transmute(f32x4::splat(*ptr))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x4)"]
+#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(ld1r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17590,20 +18336,20 @@ pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t {
-    crate::ptr::read_unaligned(a.cast())
+pub unsafe fn vld1q_dup_p16(ptr: *const p16) -> poly16x8_t {
+    transmute(u16x8::splat(*ptr))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x2)"]
+#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(ld1r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17613,20 +18359,20 @@ pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t {
-    crate::ptr::read_unaligned(a.cast())
+pub unsafe fn vld1q_dup_p8(ptr: *const p8) -> poly8x16_t {
+    transmute(u8x16::splat(*ptr))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x3)"]
+#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(ld1r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17636,20 +18382,20 @@ pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t {
-    crate::ptr::read_unaligned(a.cast())
+pub unsafe fn vld1q_dup_s16(ptr: *const i16) -> int16x8_t {
+    transmute(i16x8::splat(*ptr))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x4)"]
+#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld)
+    assert_instr(ld1r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17659,568 +18405,591 @@ pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t {
-    crate::ptr::read_unaligned(a.cast())
+pub unsafe fn vld1q_dup_s32(ptr: *const i32) -> int32x4_t {
+    transmute(i32x4::splat(*ptr))
 }
+#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[rustc_legacy_const_generics(1)]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-unsafe fn vld1_v1i64<const ALIGN: i32>(a: *const i8) -> int64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v1i64")]
-        fn _vld1_v1i64(a: *const i8, b: i32) -> int64x1_t;
-    }
-    _vld1_v1i64(a, ALIGN)
-}
-#[inline]
-#[rustc_legacy_const_generics(1)]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-unsafe fn vld1_v2f32<const ALIGN: i32>(a: *const i8) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2f32")]
-        fn _vld1_v2f32(a: *const i8, b: i32) -> float32x2_t;
-    }
-    _vld1_v2f32(a, ALIGN)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_dup_s64(ptr: *const i64) -> int64x2_t {
+    transmute(i64x2::splat(*ptr))
 }
+#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[rustc_legacy_const_generics(1)]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-unsafe fn vld1_v2i32<const ALIGN: i32>(a: *const i8) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2i32")]
-        fn _vld1_v2i32(a: *const i8, b: i32) -> int32x2_t;
-    }
-    _vld1_v2i32(a, ALIGN)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_dup_s8(ptr: *const i8) -> int8x16_t {
+    transmute(i8x16::splat(*ptr))
 }
+#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[rustc_legacy_const_generics(1)]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-unsafe fn vld1_v4i16<const ALIGN: i32>(a: *const i8) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4i16")]
-        fn _vld1_v4i16(a: *const i8, b: i32) -> int16x4_t;
-    }
-    _vld1_v4i16(a, ALIGN)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_dup_u16(ptr: *const u16) -> uint16x8_t {
+    transmute(u16x8::splat(*ptr))
 }
+#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[rustc_legacy_const_generics(1)]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-unsafe fn vld1_v8i8<const ALIGN: i32>(a: *const i8) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8i8")]
-        fn _vld1_v8i8(a: *const i8, b: i32) -> int8x8_t;
-    }
-    _vld1_v8i8(a, ALIGN)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_dup_u32(ptr: *const u32) -> uint32x4_t {
+    transmute(u32x4::splat(*ptr))
 }
+#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[rustc_legacy_const_generics(1)]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-unsafe fn vld1q_v16i8<const ALIGN: i32>(a: *const i8) -> int8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v16i8")]
-        fn _vld1q_v16i8(a: *const i8, b: i32) -> int8x16_t;
-    }
-    _vld1q_v16i8(a, ALIGN)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_dup_u64(ptr: *const u64) -> uint64x2_t {
+    transmute(u64x2::splat(*ptr))
 }
+#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[rustc_legacy_const_generics(1)]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-unsafe fn vld1q_v2i64<const ALIGN: i32>(a: *const i8) -> int64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2i64")]
-        fn _vld1q_v2i64(a: *const i8, b: i32) -> int64x2_t;
-    }
-    _vld1q_v2i64(a, ALIGN)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_dup_u8(ptr: *const u8) -> uint8x16_t {
+    transmute(u8x16::splat(*ptr))
 }
+#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[rustc_legacy_const_generics(1)]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-unsafe fn vld1q_v4f32<const ALIGN: i32>(a: *const i8) -> float32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f32")]
-        fn _vld1q_v4f32(a: *const i8, b: i32) -> float32x4_t;
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ldr)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_dup_p64(ptr: *const p64) -> poly64x1_t {
+    let x: poly64x1_t;
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    {
+        x = crate::core_arch::aarch64::vld1_p64(ptr);
     }
-    _vld1q_v4f32(a, ALIGN)
+    #[cfg(target_arch = "arm")]
+    {
+        x = crate::core_arch::arm::vld1_p64(ptr);
+    };
+    x
 }
+#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[rustc_legacy_const_generics(1)]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-unsafe fn vld1q_v4i32<const ALIGN: i32>(a: *const i8) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4i32")]
-        fn _vld1q_v4i32(a: *const i8, b: i32) -> int32x4_t;
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ldr)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_dup_s64(ptr: *const i64) -> int64x1_t {
+    let x: int64x1_t;
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    {
+        x = crate::core_arch::aarch64::vld1_s64(ptr);
     }
-    _vld1q_v4i32(a, ALIGN)
+    #[cfg(target_arch = "arm")]
+    {
+        x = crate::core_arch::arm::vld1_s64(ptr);
+    };
+    x
 }
+#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[rustc_legacy_const_generics(1)]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-unsafe fn vld1q_v8i16<const ALIGN: i32>(a: *const i8) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8i16")]
-        fn _vld1q_v8i16(a: *const i8, b: i32) -> int16x8_t;
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ldr)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_dup_u64(ptr: *const u64) -> uint64x1_t {
+    let x: uint64x1_t;
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    {
+        x = crate::core_arch::aarch64::vld1_u64(ptr);
     }
-    _vld1q_v8i16(a, ALIGN)
+    #[cfg(target_arch = "arm")]
+    {
+        x = crate::core_arch::arm::vld1_u64(ptr);
+    };
+    x
 }
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-unsafe fn vld1_v4f16(a: *const i8, b: i32) -> float16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f16")]
-        fn _vld1_v4f16(a: *const i8, b: i32) -> float16x4_t;
-    }
-    _vld1_v4f16(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
+pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t {
+    transmute(vld1_v4f16(
+        ptr as *const i8,
+        crate::mem::align_of::<f16>() as i32,
+    ))
 }
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-unsafe fn vld1q_v8f16(a: *const i8, b: i32) -> float16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8f16")]
-        fn _vld1q_v8f16(a: *const i8, b: i32) -> float16x8_t;
-    }
-    _vld1q_v8f16(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
+pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t {
+    transmute(vld1q_v8f16(
+        ptr as *const i8,
+        crate::mem::align_of::<f16>() as i32,
+    ))
 }
-#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_p64)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    assert_instr(ld)
 )]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld1_f16_x2(a: *const f16) -> float16x4x2_t {
+    crate::ptr::read_unaligned(a.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
 )]
-pub unsafe fn vld1q_dup_p64(ptr: *const p64) -> poly64x2_t {
-    let x = vld1q_lane_p64::<0>(ptr, transmute(u64x2::splat(0)));
-    simd_shuffle!(x, x, [0, 0])
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld1_f16_x3(a: *const f16) -> float16x4x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg(target_arch = "arm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f16.p0")]
-        fn _vld2_dup_f16(ptr: *const f16, size: i32) -> float16x4x2_t;
-    }
-    _vld2_dup_f16(a as _, 2)
+pub unsafe fn vld1_f16_x4(a: *const f16) -> float16x4x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg(target_arch = "arm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld2q_dup_f16(a: *const f16) -> float16x8x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8f16.p0")]
-        fn _vld2q_dup_f16(ptr: *const f16, size: i32) -> float16x8x2_t;
-    }
-    _vld2q_dup_f16(a as _, 2)
+pub unsafe fn vld1q_f16_x2(a: *const f16) -> float16x8x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld)
 )]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v4f16.p0"
-        )]
-        fn _vld2_dup_f16(ptr: *const f16) -> float16x4x2_t;
-    }
-    _vld2_dup_f16(a as _)
+pub unsafe fn vld1q_f16_x3(a: *const f16) -> float16x8x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld)
 )]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld2q_dup_f16(a: *const f16) -> float16x8x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v8f16.p0"
-        )]
-        fn _vld2q_dup_f16(ptr: *const f16) -> float16x8x2_t;
-    }
-    _vld2q_dup_f16(a as _)
+pub unsafe fn vld1q_f16_x4(a: *const f16) -> float16x8x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2f32.p0")]
-        fn _vld2_dup_f32(ptr: *const i8, size: i32) -> float32x2x2_t;
-    }
-    _vld2_dup_f32(a as *const i8, 4)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
+pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t {
+    const ALIGN: i32 = crate::mem::align_of::<f32>() as i32;
+    transmute(vld1_v2f32::<ALIGN>(ptr as *const i8))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f32.p0")]
-        fn _vld2q_dup_f32(ptr: *const i8, size: i32) -> float32x4x2_t;
-    }
-    _vld2q_dup_f32(a as *const i8, 4)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
+pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t {
+    const ALIGN: i32 = crate::mem::align_of::<f32>() as i32;
+    transmute(vld1q_v4f32::<ALIGN>(ptr as *const i8))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i8.p0")]
-        fn _vld2_dup_s8(ptr: *const i8, size: i32) -> int8x8x2_t;
-    }
-    _vld2_dup_s8(a as *const i8, 1)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
+pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t {
+    const ALIGN: i32 = crate::mem::align_of::<u8>() as i32;
+    transmute(vld1_v8i8::<ALIGN>(ptr as *const i8))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v16i8.p0")]
-        fn _vld2q_dup_s8(ptr: *const i8, size: i32) -> int8x16x2_t;
-    }
-    _vld2q_dup_s8(a as *const i8, 1)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
+pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t {
+    const ALIGN: i32 = crate::mem::align_of::<u8>() as i32;
+    transmute(vld1q_v16i8::<ALIGN>(ptr as *const i8))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i16.p0")]
-        fn _vld2_dup_s16(ptr: *const i8, size: i32) -> int16x4x2_t;
-    }
-    _vld2_dup_s16(a as *const i8, 2)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
+pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t {
+    const ALIGN: i32 = crate::mem::align_of::<u16>() as i32;
+    transmute(vld1_v4i16::<ALIGN>(ptr as *const i8))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i16.p0")]
-        fn _vld2q_dup_s16(ptr: *const i8, size: i32) -> int16x8x2_t;
-    }
-    _vld2q_dup_s16(a as *const i8, 2)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
+pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t {
+    const ALIGN: i32 = crate::mem::align_of::<u16>() as i32;
+    transmute(vld1q_v8i16::<ALIGN>(ptr as *const i8))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2i32.p0")]
-        fn _vld2_dup_s32(ptr: *const i8, size: i32) -> int32x2x2_t;
-    }
-    _vld2_dup_s32(a as *const i8, 4)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
+pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t {
+    const ALIGN: i32 = crate::mem::align_of::<u32>() as i32;
+    transmute(vld1_v2i32::<ALIGN>(ptr as *const i8))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i32.p0")]
-        fn _vld2q_dup_s32(ptr: *const i8, size: i32) -> int32x4x2_t;
-    }
-    _vld2q_dup_s32(a as *const i8, 4)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
+pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t {
+    const ALIGN: i32 = crate::mem::align_of::<u32>() as i32;
+    transmute(vld1q_v4i32::<ALIGN>(ptr as *const i8))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v2f32.p0"
-        )]
-        fn _vld2_dup_f32(ptr: *const f32) -> float32x2x2_t;
-    }
-    _vld2_dup_f32(a as _)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
+pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t {
+    const ALIGN: i32 = crate::mem::align_of::<u64>() as i32;
+    transmute(vld1_v1i64::<ALIGN>(ptr as *const i8))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v4f32.p0"
-        )]
-        fn _vld2q_dup_f32(ptr: *const f32) -> float32x4x2_t;
-    }
-    _vld2q_dup_f32(a as _)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))]
+pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t {
+    const ALIGN: i32 = crate::mem::align_of::<u64>() as i32;
+    transmute(vld1q_v2i64::<ALIGN>(ptr as *const i8))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v8i8.p0"
-        )]
-        fn _vld2_dup_s8(ptr: *const i8) -> int8x8x2_t;
-    }
-    _vld2_dup_s8(a as _)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
+pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t {
+    const ALIGN: i32 = crate::mem::align_of::<p8>() as i32;
+    transmute(vld1_v8i8::<ALIGN>(ptr as *const i8))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v16i8.p0"
-        )]
-        fn _vld2q_dup_s8(ptr: *const i8) -> int8x16x2_t;
-    }
-    _vld2q_dup_s8(a as _)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
+pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t {
+    const ALIGN: i32 = crate::mem::align_of::<p8>() as i32;
+    transmute(vld1q_v16i8::<ALIGN>(ptr as *const i8))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v4i16.p0"
-        )]
-        fn _vld2_dup_s16(ptr: *const i16) -> int16x4x2_t;
-    }
-    _vld2_dup_s16(a as _)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
+pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t {
+    const ALIGN: i32 = crate::mem::align_of::<p16>() as i32;
+    transmute(vld1_v4i16::<ALIGN>(ptr as *const i8))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v8i16.p0"
-        )]
-        fn _vld2q_dup_s16(ptr: *const i16) -> int16x8x2_t;
-    }
-    _vld2q_dup_s16(a as _)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
+pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t {
+    const ALIGN: i32 = crate::mem::align_of::<p16>() as i32;
+    transmute(vld1q_v8i16::<ALIGN>(ptr as *const i8))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v2i32.p0"
-        )]
-        fn _vld2_dup_s32(ptr: *const i32) -> int32x2x2_t;
-    }
-    _vld2_dup_s32(a as _)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,aes")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))]
+pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t {
+    const ALIGN: i32 = crate::mem::align_of::<p64>() as i32;
+    transmute(vld1q_v2i64::<ALIGN>(ptr as *const i8))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v4i32.p0"
-        )]
-        fn _vld2q_dup_s32(ptr: *const i32) -> int32x4x2_t;
-    }
-    _vld2q_dup_s32(a as _)
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18230,55 +18999,20 @@ pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_dup_p64(a: *const p64) -> poly64x1x2_t {
-    transmute(vld2_dup_s64(transmute(a)))
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v1i64.p0")]
-        fn _vld2_dup_s64(ptr: *const i8, size: i32) -> int64x1x2_t;
-    }
-    _vld2_dup_s64(a as *const i8, 8)
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v1i64.p0"
-        )]
-        fn _vld2_dup_s64(ptr: *const i64) -> int64x1x2_t;
-    }
-    _vld2_dup_s64(a as _)
+pub unsafe fn vld1_f32_x2(a: *const f32) -> float32x2x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u64)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18288,20 +19022,20 @@ pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_dup_u64(a: *const u64) -> uint64x1x2_t {
-    transmute(vld2_dup_s64(transmute(a)))
+pub unsafe fn vld1_f32_x3(a: *const f32) -> float32x2x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18311,20 +19045,20 @@ pub unsafe fn vld2_dup_u64(a: *const u64) -> uint64x1x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t {
-    transmute(vld2_dup_s8(transmute(a)))
+pub unsafe fn vld1_f32_x4(a: *const f32) -> float32x2x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18334,20 +19068,20 @@ pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t {
-    transmute(vld2q_dup_s8(transmute(a)))
+pub unsafe fn vld1q_f32_x2(a: *const f32) -> float32x4x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18357,20 +19091,20 @@ pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t {
-    transmute(vld2_dup_s16(transmute(a)))
+pub unsafe fn vld1q_f32_x3(a: *const f32) -> float32x4x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18380,20 +19114,61 @@ pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t {
-    transmute(vld2q_dup_s16(transmute(a)))
+pub unsafe fn vld1q_f32_x4(a: *const f32) -> float32x4x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u32)"]
+#[doc = "Load one single-element structure to one lane of one register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld1, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld1_lane_f16<const LANE: i32>(ptr: *const f16, src: float16x4_t) -> float16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_insert!(src, LANE as u32, *ptr)
+}
+#[doc = "Load one single-element structure to one lane of one register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld1q_lane_f16<const LANE: i32>(ptr: *const f16, src: float16x8_t) -> float16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    simd_insert!(src, LANE as u32, *ptr)
+}
+#[doc = "Load one single-element structure to one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18403,20 +19178,22 @@ pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t {
-    transmute(vld2_dup_s32(transmute(a)))
+pub unsafe fn vld1_lane_f32<const LANE: i32>(ptr: *const f32, src: float32x2_t) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_insert!(src, LANE as u32, *ptr)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u32)"]
+#[doc = "Load one single-element structure to one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld1, LANE = 3)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18426,20 +19203,22 @@ pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t {
-    transmute(vld2q_dup_s32(transmute(a)))
+pub unsafe fn vld1_lane_p16<const LANE: i32>(ptr: *const p16, src: poly16x4_t) -> poly16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_insert!(src, LANE as u32, *ptr)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p8)"]
+#[doc = "Load one single-element structure to one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld1, LANE = 7)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18449,20 +19228,22 @@ pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t {
-    transmute(vld2_dup_s8(transmute(a)))
+pub unsafe fn vld1_lane_p8<const LANE: i32>(ptr: *const p8, src: poly8x8_t) -> poly8x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    simd_insert!(src, LANE as u32, *ptr)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p8)"]
+#[doc = "Load one single-element structure to one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld1, LANE = 3)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18472,20 +19253,22 @@ pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t {
-    transmute(vld2q_dup_s8(transmute(a)))
+pub unsafe fn vld1_lane_s16<const LANE: i32>(ptr: *const i16, src: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_insert!(src, LANE as u32, *ptr)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p16)"]
+#[doc = "Load one single-element structure to one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld1, LANE = 1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18495,20 +19278,22 @@ pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t {
-    transmute(vld2_dup_s16(transmute(a)))
+pub unsafe fn vld1_lane_s32<const LANE: i32>(ptr: *const i32, src: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_insert!(src, LANE as u32, *ptr)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p16)"]
+#[doc = "Load one single-element structure to one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ldr, LANE = 0)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18518,809 +19303,778 @@ pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t {
-    transmute(vld2q_dup_s16(transmute(a)))
+pub unsafe fn vld1_lane_s64<const LANE: i32>(ptr: *const i64, src: int64x1_t) -> int64x1_t {
+    static_assert!(LANE == 0);
+    simd_insert!(src, LANE as u32, *ptr)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f16)"]
-#[doc = "## Safety"]
+#[doc = "Load one single-element structure to one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s8)"]
+#[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f16.p0")]
-        fn _vld2_f16(ptr: *const f16, size: i32) -> float16x4x2_t;
-    }
-    _vld2_f16(a as _, 2)
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 7)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_lane_s8<const LANE: i32>(ptr: *const i8, src: int8x8_t) -> int8x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    simd_insert!(src, LANE as u32, *ptr)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f16)"]
+#[doc = "Load one single-element structure to one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld2q_f16(a: *const f16) -> float16x8x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8f16.p0")]
-        fn _vld2q_f16(ptr: *const f16, size: i32) -> float16x8x2_t;
-    }
-    _vld2q_f16(a as _, 2)
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_lane_u16<const LANE: i32>(ptr: *const u16, src: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_insert!(src, LANE as u32, *ptr)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f16)"]
+#[doc = "Load one single-element structure to one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ld1, LANE = 1)
 )]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v4f16.p0"
-        )]
-        fn _vld2_f16(ptr: *const f16) -> float16x4x2_t;
-    }
-    _vld2_f16(a as _)
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_lane_u32<const LANE: i32>(ptr: *const u32, src: uint32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_insert!(src, LANE as u32, *ptr)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f16)"]
+#[doc = "Load one single-element structure to one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ldr, LANE = 0)
 )]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld2q_f16(a: *const f16) -> float16x8x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v8f16.p0"
-        )]
-        fn _vld2q_f16(ptr: *const f16) -> float16x8x2_t;
-    }
-    _vld2q_f16(a as _)
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_lane_u64<const LANE: i32>(ptr: *const u64, src: uint64x1_t) -> uint64x1_t {
+    static_assert!(LANE == 0);
+    simd_insert!(src, LANE as u32, *ptr)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"]
+#[doc = "Load one single-element structure to one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2f32")]
-        fn _vld2_f32(ptr: *const i8, size: i32) -> float32x2x2_t;
-    }
-    _vld2_f32(a as *const i8, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 7)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_lane_u8<const LANE: i32>(ptr: *const u8, src: uint8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    simd_insert!(src, LANE as u32, *ptr)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)"]
+#[doc = "Load one single-element structure to one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f32")]
-        fn _vld2q_f32(ptr: *const i8, size: i32) -> float32x4x2_t;
-    }
-    _vld2q_f32(a as *const i8, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_lane_f32<const LANE: i32>(ptr: *const f32, src: float32x4_t) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_insert!(src, LANE as u32, *ptr)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)"]
+#[doc = "Load one single-element structure to one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i8")]
-        fn _vld2_s8(ptr: *const i8, size: i32) -> int8x8x2_t;
-    }
-    _vld2_s8(a as *const i8, 1)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 7)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_lane_p16<const LANE: i32>(ptr: *const p16, src: poly16x8_t) -> poly16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    simd_insert!(src, LANE as u32, *ptr)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)"]
+#[doc = "Load one single-element structure to one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v16i8")]
-        fn _vld2q_s8(ptr: *const i8, size: i32) -> int8x16x2_t;
-    }
-    _vld2q_s8(a as *const i8, 1)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 15)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_lane_p8<const LANE: i32>(ptr: *const p8, src: poly8x16_t) -> poly8x16_t {
+    static_assert_uimm_bits!(LANE, 4);
+    simd_insert!(src, LANE as u32, *ptr)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)"]
+#[doc = "Load one single-element structure to one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i16")]
-        fn _vld2_s16(ptr: *const i8, size: i32) -> int16x4x2_t;
-    }
-    _vld2_s16(a as *const i8, 2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 7)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_lane_s16<const LANE: i32>(ptr: *const i16, src: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    simd_insert!(src, LANE as u32, *ptr)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)"]
+#[doc = "Load one single-element structure to one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i16")]
-        fn _vld2q_s16(ptr: *const i8, size: i32) -> int16x8x2_t;
-    }
-    _vld2q_s16(a as *const i8, 2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_lane_s32<const LANE: i32>(ptr: *const i32, src: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_insert!(src, LANE as u32, *ptr)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)"]
+#[doc = "Load one single-element structure to one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2i32")]
-        fn _vld2_s32(ptr: *const i8, size: i32) -> int32x2x2_t;
-    }
-    _vld2_s32(a as *const i8, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_lane_s64<const LANE: i32>(ptr: *const i64, src: int64x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_insert!(src, LANE as u32, *ptr)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)"]
+#[doc = "Load one single-element structure to one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i32")]
-        fn _vld2q_s32(ptr: *const i8, size: i32) -> int32x4x2_t;
-    }
-    _vld2q_s32(a as *const i8, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 15)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_lane_s8<const LANE: i32>(ptr: *const i8, src: int8x16_t) -> int8x16_t {
+    static_assert_uimm_bits!(LANE, 4);
+    simd_insert!(src, LANE as u32, *ptr)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"]
+#[doc = "Load one single-element structure to one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v2f32.p0"
-        )]
-        fn _vld2_f32(ptr: *const float32x2_t) -> float32x2x2_t;
-    }
-    _vld2_f32(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 7)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_lane_u16<const LANE: i32>(ptr: *const u16, src: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    simd_insert!(src, LANE as u32, *ptr)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)"]
+#[doc = "Load one single-element structure to one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v4f32.p0"
-        )]
-        fn _vld2q_f32(ptr: *const float32x4_t) -> float32x4x2_t;
-    }
-    _vld2q_f32(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_lane_u32<const LANE: i32>(ptr: *const u32, src: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_insert!(src, LANE as u32, *ptr)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)"]
+#[doc = "Load one single-element structure to one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v8i8.p0"
-        )]
-        fn _vld2_s8(ptr: *const int8x8_t) -> int8x8x2_t;
-    }
-    _vld2_s8(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_lane_u64<const LANE: i32>(ptr: *const u64, src: uint64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_insert!(src, LANE as u32, *ptr)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)"]
+#[doc = "Load one single-element structure to one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v16i8.p0"
-        )]
-        fn _vld2q_s8(ptr: *const int8x16_t) -> int8x16x2_t;
-    }
-    _vld2q_s8(a as _)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)"]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 15)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_lane_u8<const LANE: i32>(ptr: *const u8, src: uint8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(LANE, 4);
+    simd_insert!(src, LANE as u32, *ptr)
+}
+#[doc = "Load one single-element structure to one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v4i16.p0"
-        )]
-        fn _vld2_s16(ptr: *const int16x4_t) -> int16x4x2_t;
-    }
-    _vld2_s16(a as _)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ldr, LANE = 0)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_lane_p64<const LANE: i32>(ptr: *const p64, src: poly64x1_t) -> poly64x1_t {
+    static_assert!(LANE == 0);
+    simd_insert!(src, LANE as u32, *ptr)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)"]
+#[doc = "Load one single-element structure to one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v8i16.p0"
-        )]
-        fn _vld2q_s16(ptr: *const int16x8_t) -> int16x8x2_t;
-    }
-    _vld2q_s16(a as _)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1, LANE = 1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_lane_p64<const LANE: i32>(ptr: *const p64, src: poly64x2_t) -> poly64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_insert!(src, LANE as u32, *ptr)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,aes")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
+pub unsafe fn vld1_p64(ptr: *const p64) -> poly64x1_t {
+    let a: *const i8 = ptr as *const i8;
+    let b: i32 = crate::mem::align_of::<p64>() as i32;
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v2i32.p0"
-        )]
-        fn _vld2_s32(ptr: *const int32x2_t) -> int32x2x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v1i64")]
+        fn _vld1_v1i64(a: *const i8, b: i32) -> int64x1_t;
     }
-    _vld2_s32(a as _)
+    transmute(_vld1_v1i64(a, b))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v4i32.p0"
-        )]
-        fn _vld2q_s32(ptr: *const int32x4_t) -> int32x4x2_t;
-    }
-    _vld2q_s32(a as _)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_p64_x2(a: *const p64) -> poly64x1x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld2_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x2_t) -> float16x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f16.p0")]
-        fn _vld2_lane_f16(
-            ptr: *const f16,
-            a: float16x4_t,
-            b: float16x4_t,
-            n: i32,
-            size: i32,
-        ) -> float16x4x2_t;
-    }
-    _vld2_lane_f16(a as _, b.0, b.1, LANE, 2)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_p64_x3(a: *const p64) -> poly64x1x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld2q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x2_t) -> float16x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8f16.p0")]
-        fn _vld2q_lane_f16(
-            ptr: *const f16,
-            a: float16x8_t,
-            b: float16x8_t,
-            n: i32,
-            size: i32,
-        ) -> float16x8x2_t;
-    }
-    _vld2q_lane_f16(a as _, b.0, b.1, LANE, 2)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_p64_x4(a: *const p64) -> poly64x1x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld)
 )]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld2_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x2_t) -> float16x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v4f16.p0"
-        )]
-        fn _vld2_lane_f16(a: float16x4_t, b: float16x4_t, n: i64, ptr: *const f16)
-            -> float16x4x2_t;
-    }
-    _vld2_lane_f16(b.0, b.1, LANE as i64, a as _)
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld)
 )]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld2q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x2_t) -> float16x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v8f16.p0"
-        )]
-        fn _vld2q_lane_f16(
-            a: float16x8_t,
-            b: float16x8_t,
-            n: i64,
-            ptr: *const f16,
-        ) -> float16x8x2_t;
-    }
-    _vld2q_lane_f16(b.0, b.1, LANE as i64, a as _)
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x2_t) -> float32x2x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v2f32.p0"
-        )]
-        fn _vld2_lane_f32(a: float32x2_t, b: float32x2_t, n: i64, ptr: *const i8) -> float32x2x2_t;
-    }
-    _vld2_lane_f32(b.0, b.1, LANE as i64, a as _)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x2_t) -> float32x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v4f32.p0"
-        )]
-        fn _vld2q_lane_f32(a: float32x4_t, b: float32x4_t, n: i64, ptr: *const i8)
-            -> float32x4x2_t;
-    }
-    _vld2q_lane_f32(b.0, b.1, LANE as i64, a as _)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
+pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t {
+    const ALIGN: i32 = crate::mem::align_of::<i8>() as i32;
+    vld1_v8i8::<ALIGN>(ptr as *const i8)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x2_t) -> int8x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v8i8.p0"
-        )]
-        fn _vld2_lane_s8(a: int8x8_t, b: int8x8_t, n: i64, ptr: *const i8) -> int8x8x2_t;
-    }
-    _vld2_lane_s8(b.0, b.1, LANE as i64, a as _)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
+pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t {
+    const ALIGN: i32 = crate::mem::align_of::<i8>() as i32;
+    vld1q_v16i8::<ALIGN>(ptr as *const i8)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x2_t) -> int16x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v4i16.p0"
-        )]
-        fn _vld2_lane_s16(a: int16x4_t, b: int16x4_t, n: i64, ptr: *const i8) -> int16x4x2_t;
-    }
-    _vld2_lane_s16(b.0, b.1, LANE as i64, a as _)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
+pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t {
+    const ALIGN: i32 = crate::mem::align_of::<i16>() as i32;
+    vld1_v4i16::<ALIGN>(ptr as *const i8)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x2_t) -> int16x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v8i16.p0"
-        )]
-        fn _vld2q_lane_s16(a: int16x8_t, b: int16x8_t, n: i64, ptr: *const i8) -> int16x8x2_t;
-    }
-    _vld2q_lane_s16(b.0, b.1, LANE as i64, a as _)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
+pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t {
+    const ALIGN: i32 = crate::mem::align_of::<i16>() as i32;
+    vld1q_v8i16::<ALIGN>(ptr as *const i8)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x2_t) -> int32x2x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v2i32.p0"
-        )]
-        fn _vld2_lane_s32(a: int32x2_t, b: int32x2_t, n: i64, ptr: *const i8) -> int32x2x2_t;
-    }
-    _vld2_lane_s32(b.0, b.1, LANE as i64, a as _)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
+pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t {
+    const ALIGN: i32 = crate::mem::align_of::<i32>() as i32;
+    vld1_v2i32::<ALIGN>(ptr as *const i8)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x2_t) -> int32x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v4i32.p0"
-        )]
-        fn _vld2q_lane_s32(a: int32x4_t, b: int32x4_t, n: i64, ptr: *const i8) -> int32x4x2_t;
-    }
-    _vld2q_lane_s32(b.0, b.1, LANE as i64, a as _)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
+pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t {
+    const ALIGN: i32 = crate::mem::align_of::<i32>() as i32;
+    vld1q_v4i32::<ALIGN>(ptr as *const i8)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x2_t) -> float32x2x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2f32.p0")]
-        fn _vld2_lane_f32(
-            ptr: *const i8,
-            a: float32x2_t,
-            b: float32x2_t,
-            n: i32,
-            size: i32,
-        ) -> float32x2x2_t;
-    }
-    _vld2_lane_f32(a as _, b.0, b.1, LANE, 4)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x2_t) -> float32x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f32.p0")]
-        fn _vld2q_lane_f32(
-            ptr: *const i8,
-            a: float32x4_t,
-            b: float32x4_t,
-            n: i32,
-            size: i32,
-        ) -> float32x4x2_t;
-    }
-    _vld2q_lane_f32(a as _, b.0, b.1, LANE, 4)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x2_t) -> int16x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i16.p0")]
-        fn _vld2q_lane_s16(
-            ptr: *const i8,
-            a: int16x8_t,
-            b: int16x8_t,
-            n: i32,
-            size: i32,
-        ) -> int16x8x2_t;
-    }
-    _vld2q_lane_s16(a as _, b.0, b.1, LANE, 2)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
+pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t {
+    const ALIGN: i32 = crate::mem::align_of::<i64>() as i32;
+    vld1_v1i64::<ALIGN>(ptr as *const i8)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x2_t) -> int32x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i32.p0")]
-        fn _vld2q_lane_s32(
-            ptr: *const i8,
-            a: int32x4_t,
-            b: int32x4_t,
-            n: i32,
-            size: i32,
-        ) -> int32x4x2_t;
-    }
-    _vld2q_lane_s32(a as _, b.0, b.1, LANE, 4)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x2_t) -> int8x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i8.p0")]
-        fn _vld2_lane_s8(ptr: *const i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32)
-            -> int8x8x2_t;
-    }
-    _vld2_lane_s8(a as _, b.0, b.1, LANE, 1)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))]
+pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t {
+    const ALIGN: i32 = crate::mem::align_of::<i64>() as i32;
+    vld1q_v2i64::<ALIGN>(ptr as *const i8)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x2_t) -> int16x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i16.p0")]
-        fn _vld2_lane_s16(
-            ptr: *const i8,
-            a: int16x4_t,
-            b: int16x4_t,
-            n: i32,
-            size: i32,
-        ) -> int16x4x2_t;
-    }
-    _vld2_lane_s16(a as _, b.0, b.1, LANE, 2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_s8_x2(a: *const i8) -> int8x8x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x2_t) -> int32x2x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2i32.p0")]
-        fn _vld2_lane_s32(
-            ptr: *const i8,
-            a: int32x2_t,
-            b: int32x2_t,
-            n: i32,
-            size: i32,
-        ) -> int32x2x2_t;
-    }
-    _vld2_lane_s32(a as _, b.0, b.1, LANE, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_s8_x3(a: *const i8) -> int8x8x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19329,23 +20083,21 @@ pub unsafe fn vld2_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x2_t) -> i
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x2_t) -> uint8x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld2_lane_s8::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld1_s8_x4(a: *const i8) -> int8x8x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19354,23 +20106,21 @@ pub unsafe fn vld2_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x2_t) -> uin
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x2_t) -> uint16x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld2_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld1q_s8_x2(a: *const i8) -> int8x16x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19379,23 +20129,21 @@ pub unsafe fn vld2_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x2_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x2_t) -> uint16x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld2q_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld1q_s8_x3(a: *const i8) -> int8x16x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19404,23 +20152,21 @@ pub unsafe fn vld2q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x2_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x2_t) -> uint32x2x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    transmute(vld2_lane_s32::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld1q_s8_x4(a: *const i8) -> int8x16x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19429,23 +20175,21 @@ pub unsafe fn vld2_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x2_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x2_t) -> uint32x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld2q_lane_s32::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld1_s16_x2(a: *const i16) -> int16x4x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19454,23 +20198,21 @@ pub unsafe fn vld2q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x2_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x2_t) -> poly8x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld2_lane_s8::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld1_s16_x3(a: *const i16) -> int16x4x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19479,23 +20221,21 @@ pub unsafe fn vld2_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x2_t) -> pol
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x2_t) -> poly16x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld2_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld1_s16_x4(a: *const i16) -> int16x4x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_p16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19504,21 +20244,20 @@ pub unsafe fn vld2_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x2_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x2_t) -> poly16x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld2q_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld1q_s16_x2(a: *const i16) -> int16x8x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p64)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19528,55 +20267,66 @@ pub unsafe fn vld2q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x2_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_p64(a: *const p64) -> poly64x1x2_t {
-    transmute(vld2_s64(transmute(a)))
+pub unsafe fn vld1q_s16_x3(a: *const i16) -> int16x8x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v1i64")]
-        fn _vld2_s64(ptr: *const i8, size: i32) -> int64x1x2_t;
-    }
-    _vld2_s64(a as *const i8, 8)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_s16_x4(a: *const i16) -> int16x8x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v1i64.p0"
-        )]
-        fn _vld2_s64(ptr: *const int64x1_t) -> int64x1x2_t;
-    }
-    _vld2_s64(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_s32_x2(a: *const i32) -> int32x2x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u64)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19586,20 +20336,20 @@ pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_u64(a: *const u64) -> uint64x1x2_t {
-    transmute(vld2_s64(transmute(a)))
+pub unsafe fn vld1_s32_x3(a: *const i32) -> int32x2x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19609,20 +20359,20 @@ pub unsafe fn vld2_u64(a: *const u64) -> uint64x1x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_u8(a: *const u8) -> uint8x8x2_t {
-    transmute(vld2_s8(transmute(a)))
+pub unsafe fn vld1_s32_x4(a: *const i32) -> int32x2x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19632,20 +20382,20 @@ pub unsafe fn vld2_u8(a: *const u8) -> uint8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_u8(a: *const u8) -> uint8x16x2_t {
-    transmute(vld2q_s8(transmute(a)))
+pub unsafe fn vld1q_s32_x2(a: *const i32) -> int32x4x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19655,20 +20405,20 @@ pub unsafe fn vld2q_u8(a: *const u8) -> uint8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_u16(a: *const u16) -> uint16x4x2_t {
-    transmute(vld2_s16(transmute(a)))
+pub unsafe fn vld1q_s32_x3(a: *const i32) -> int32x4x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19678,20 +20428,20 @@ pub unsafe fn vld2_u16(a: *const u16) -> uint16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_u16(a: *const u16) -> uint16x8x2_t {
-    transmute(vld2q_s16(transmute(a)))
+pub unsafe fn vld1q_s32_x4(a: *const i32) -> int32x4x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19701,20 +20451,20 @@ pub unsafe fn vld2q_u16(a: *const u16) -> uint16x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_u32(a: *const u32) -> uint32x2x2_t {
-    transmute(vld2_s32(transmute(a)))
+pub unsafe fn vld1_s64_x2(a: *const i64) -> int64x1x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19724,20 +20474,20 @@ pub unsafe fn vld2_u32(a: *const u32) -> uint32x2x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_u32(a: *const u32) -> uint32x4x2_t {
-    transmute(vld2q_s32(transmute(a)))
+pub unsafe fn vld1_s64_x3(a: *const i64) -> int64x1x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19747,20 +20497,20 @@ pub unsafe fn vld2q_u32(a: *const u32) -> uint32x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_p8(a: *const p8) -> poly8x8x2_t {
-    transmute(vld2_s8(transmute(a)))
+pub unsafe fn vld1_s64_x4(a: *const i64) -> int64x1x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19770,20 +20520,20 @@ pub unsafe fn vld2_p8(a: *const p8) -> poly8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_p8(a: *const p8) -> poly8x16x2_t {
-    transmute(vld2q_s8(transmute(a)))
+pub unsafe fn vld1q_s64_x2(a: *const i64) -> int64x2x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19793,20 +20543,20 @@ pub unsafe fn vld2q_p8(a: *const p8) -> poly8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_p16(a: *const p16) -> poly16x4x2_t {
-    transmute(vld2_s16(transmute(a)))
+pub unsafe fn vld1q_s64_x3(a: *const i64) -> int64x2x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19816,405 +20566,43 @@ pub unsafe fn vld2_p16(a: *const p16) -> poly16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_p16(a: *const p16) -> poly16x8x2_t {
-    transmute(vld2q_s16(transmute(a)))
+pub unsafe fn vld1q_s64_x4(a: *const i64) -> int64x2x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f16.p0")]
-        fn _vld3_dup_f16(ptr: *const f16, size: i32) -> float16x4x3_t;
-    }
-    _vld3_dup_f16(a as _, 2)
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld3q_dup_f16(a: *const f16) -> float16x8x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8f16.p0")]
-        fn _vld3q_dup_f16(ptr: *const f16, size: i32) -> float16x8x3_t;
-    }
-    _vld3q_dup_f16(a as _, 2)
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld)
 )]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v4f16.p0"
-        )]
-        fn _vld3_dup_f16(ptr: *const f16) -> float16x4x3_t;
-    }
-    _vld3_dup_f16(a as _)
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld3q_dup_f16(a: *const f16) -> float16x8x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v8f16.p0"
-        )]
-        fn _vld3q_dup_f16(ptr: *const f16) -> float16x8x3_t;
-    }
-    _vld3q_dup_f16(a as _)
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v2f32.p0"
-        )]
-        fn _vld3_dup_f32(ptr: *const f32) -> float32x2x3_t;
-    }
-    _vld3_dup_f32(a as _)
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v4f32.p0"
-        )]
-        fn _vld3q_dup_f32(ptr: *const f32) -> float32x4x3_t;
-    }
-    _vld3q_dup_f32(a as _)
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v8i8.p0"
-        )]
-        fn _vld3_dup_s8(ptr: *const i8) -> int8x8x3_t;
-    }
-    _vld3_dup_s8(a as _)
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v16i8.p0"
-        )]
-        fn _vld3q_dup_s8(ptr: *const i8) -> int8x16x3_t;
-    }
-    _vld3q_dup_s8(a as _)
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v4i16.p0"
-        )]
-        fn _vld3_dup_s16(ptr: *const i16) -> int16x4x3_t;
-    }
-    _vld3_dup_s16(a as _)
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v8i16.p0"
-        )]
-        fn _vld3q_dup_s16(ptr: *const i16) -> int16x8x3_t;
-    }
-    _vld3q_dup_s16(a as _)
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v2i32.p0"
-        )]
-        fn _vld3_dup_s32(ptr: *const i32) -> int32x2x3_t;
-    }
-    _vld3_dup_s32(a as _)
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v4i32.p0"
-        )]
-        fn _vld3q_dup_s32(ptr: *const i32) -> int32x4x3_t;
-    }
-    _vld3q_dup_s32(a as _)
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v1i64.p0"
-        )]
-        fn _vld3_dup_s64(ptr: *const i64) -> int64x1x3_t;
-    }
-    _vld3_dup_s64(a as _)
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2f32.p0")]
-        fn _vld3_dup_f32(ptr: *const i8, size: i32) -> float32x2x3_t;
-    }
-    _vld3_dup_f32(a as *const i8, 4)
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f32.p0")]
-        fn _vld3q_dup_f32(ptr: *const i8, size: i32) -> float32x4x3_t;
-    }
-    _vld3q_dup_f32(a as *const i8, 4)
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i8.p0")]
-        fn _vld3_dup_s8(ptr: *const i8, size: i32) -> int8x8x3_t;
-    }
-    _vld3_dup_s8(a as *const i8, 1)
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v16i8.p0")]
-        fn _vld3q_dup_s8(ptr: *const i8, size: i32) -> int8x16x3_t;
-    }
-    _vld3q_dup_s8(a as *const i8, 1)
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i16.p0")]
-        fn _vld3_dup_s16(ptr: *const i8, size: i32) -> int16x4x3_t;
-    }
-    _vld3_dup_s16(a as *const i8, 2)
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i16.p0")]
-        fn _vld3q_dup_s16(ptr: *const i8, size: i32) -> int16x8x3_t;
-    }
-    _vld3q_dup_s16(a as *const i8, 2)
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2i32.p0")]
-        fn _vld3_dup_s32(ptr: *const i8, size: i32) -> int32x2x3_t;
-    }
-    _vld3_dup_s32(a as *const i8, 4)
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i32.p0")]
-        fn _vld3q_dup_s32(ptr: *const i8, size: i32) -> int32x4x3_t;
-    }
-    _vld3q_dup_s32(a as *const i8, 4)
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20224,36 +20612,20 @@ pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_dup_p64(a: *const p64) -> poly64x1x3_t {
-    transmute(vld3_dup_s64(transmute(a)))
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v1i64.p0")]
-        fn _vld3_dup_s64(ptr: *const i8, size: i32) -> int64x1x3_t;
-    }
-    _vld3_dup_s64(a as *const i8, 8)
+pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u64)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20263,20 +20635,20 @@ pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_dup_u64(a: *const u64) -> uint64x1x3_t {
-    transmute(vld3_dup_s64(transmute(a)))
+pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20286,20 +20658,20 @@ pub unsafe fn vld3_dup_u64(a: *const u64) -> uint64x1x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t {
-    transmute(vld3_dup_s8(transmute(a)))
+pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20309,20 +20681,20 @@ pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t {
-    transmute(vld3q_dup_s8(transmute(a)))
+pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20332,20 +20704,20 @@ pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t {
-    transmute(vld3_dup_s16(transmute(a)))
+pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20355,20 +20727,20 @@ pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t {
-    transmute(vld3q_dup_s16(transmute(a)))
+pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20378,20 +20750,20 @@ pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t {
-    transmute(vld3_dup_s32(transmute(a)))
+pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20401,20 +20773,20 @@ pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t {
-    transmute(vld3q_dup_s32(transmute(a)))
+pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20424,20 +20796,20 @@ pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t {
-    transmute(vld3_dup_s8(transmute(a)))
+pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20447,20 +20819,20 @@ pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t {
-    transmute(vld3q_dup_s8(transmute(a)))
+pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20470,20 +20842,20 @@ pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t {
-    transmute(vld3_dup_s16(transmute(a)))
+pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20493,361 +20865,760 @@ pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t {
-    transmute(vld3q_dup_s16(transmute(a)))
+pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f16.p0")]
-        fn _vld3_f16(ptr: *const f16, size: i32) -> float16x4x3_t;
-    }
-    _vld3_f16(a as _, 2)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld3q_f16(a: *const f16) -> float16x8x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8f16.p0")]
-        fn _vld3q_f16(ptr: *const f16, size: i32) -> float16x8x3_t;
-    }
-    _vld3q_f16(a as _, 2)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
+    assert_instr(ld)
 )]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t {
-    crate::core_arch::macros::deinterleaving_load!(f16, 4, 3, a)
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
+    assert_instr(ld)
 )]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld3q_f16(a: *const f16) -> float16x8x3_t {
-    crate::core_arch::macros::deinterleaving_load!(f16, 8, 3, a)
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t {
-    crate::core_arch::macros::deinterleaving_load!(f32, 2, 3, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t {
-    crate::core_arch::macros::deinterleaving_load!(f32, 4, 3, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u64_x2(a: *const u64) -> uint64x1x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t {
-    crate::core_arch::macros::deinterleaving_load!(i8, 8, 3, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u64_x3(a: *const u64) -> uint64x1x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t {
-    crate::core_arch::macros::deinterleaving_load!(i8, 16, 3, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u64_x4(a: *const u64) -> uint64x1x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t {
-    crate::core_arch::macros::deinterleaving_load!(i16, 4, 3, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t {
-    crate::core_arch::macros::deinterleaving_load!(i16, 8, 3, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t {
-    crate::core_arch::macros::deinterleaving_load!(i32, 2, 3, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t {
-    crate::core_arch::macros::deinterleaving_load!(i32, 4, 3, a)
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2f32.p0")]
-        fn _vld3_f32(ptr: *const i8, size: i32) -> float32x2x3_t;
-    }
-    _vld3_f32(a as *const i8, 4)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f32.p0")]
-        fn _vld3q_f32(ptr: *const i8, size: i32) -> float32x4x3_t;
-    }
-    _vld3q_f32(a as *const i8, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i8.p0")]
-        fn _vld3_s8(ptr: *const i8, size: i32) -> int8x8x3_t;
-    }
-    _vld3_s8(a as *const i8, 1)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v16i8.p0")]
-        fn _vld3q_s8(ptr: *const i8, size: i32) -> int8x16x3_t;
-    }
-    _vld3q_s8(a as *const i8, 1)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i16.p0")]
-        fn _vld3_s16(ptr: *const i8, size: i32) -> int16x4x3_t;
-    }
-    _vld3_s16(a as *const i8, 2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i16.p0")]
-        fn _vld3q_s16(ptr: *const i8, size: i32) -> int16x8x3_t;
-    }
-    _vld3q_s16(a as *const i8, 2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t {
+    crate::ptr::read_unaligned(a.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t {
+    crate::ptr::read_unaligned(a.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t {
+    crate::ptr::read_unaligned(a.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t {
+    crate::ptr::read_unaligned(a.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t {
+    crate::ptr::read_unaligned(a.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t {
+    crate::ptr::read_unaligned(a.cast())
+}
+#[inline]
+#[rustc_legacy_const_generics(1)]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t {
+unsafe fn vld1_v1i64<const ALIGN: i32>(a: *const i8) -> int64x1_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2i32.p0")]
-        fn _vld3_s32(ptr: *const i8, size: i32) -> int32x2x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v1i64")]
+        fn _vld1_v1i64(a: *const i8, b: i32) -> int64x1_t;
     }
-    _vld3_s32(a as *const i8, 4)
+    _vld1_v1i64(a, ALIGN)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[rustc_legacy_const_generics(1)]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t {
+unsafe fn vld1_v2f32<const ALIGN: i32>(a: *const i8) -> float32x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i32.p0")]
-        fn _vld3q_s32(ptr: *const i8, size: i32) -> int32x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2f32")]
+        fn _vld1_v2f32(a: *const i8, b: i32) -> float32x2_t;
     }
-    _vld3q_s32(a as *const i8, 4)
+    _vld1_v2f32(a, ALIGN)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f16)"]
+#[inline]
+#[rustc_legacy_const_generics(1)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1_v2i32<const ALIGN: i32>(a: *const i8) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2i32")]
+        fn _vld1_v2i32(a: *const i8, b: i32) -> int32x2_t;
+    }
+    _vld1_v2i32(a, ALIGN)
+}
+#[inline]
+#[rustc_legacy_const_generics(1)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1_v4i16<const ALIGN: i32>(a: *const i8) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4i16")]
+        fn _vld1_v4i16(a: *const i8, b: i32) -> int16x4_t;
+    }
+    _vld1_v4i16(a, ALIGN)
+}
+#[inline]
+#[rustc_legacy_const_generics(1)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1_v8i8<const ALIGN: i32>(a: *const i8) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8i8")]
+        fn _vld1_v8i8(a: *const i8, b: i32) -> int8x8_t;
+    }
+    _vld1_v8i8(a, ALIGN)
+}
+#[inline]
+#[rustc_legacy_const_generics(1)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1q_v16i8<const ALIGN: i32>(a: *const i8) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v16i8")]
+        fn _vld1q_v16i8(a: *const i8, b: i32) -> int8x16_t;
+    }
+    _vld1q_v16i8(a, ALIGN)
+}
+#[inline]
+#[rustc_legacy_const_generics(1)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1q_v2i64<const ALIGN: i32>(a: *const i8) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2i64")]
+        fn _vld1q_v2i64(a: *const i8, b: i32) -> int64x2_t;
+    }
+    _vld1q_v2i64(a, ALIGN)
+}
+#[inline]
+#[rustc_legacy_const_generics(1)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1q_v4f32<const ALIGN: i32>(a: *const i8) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f32")]
+        fn _vld1q_v4f32(a: *const i8, b: i32) -> float32x4_t;
+    }
+    _vld1q_v4f32(a, ALIGN)
+}
+#[inline]
+#[rustc_legacy_const_generics(1)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1q_v4i32<const ALIGN: i32>(a: *const i8) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4i32")]
+        fn _vld1q_v4i32(a: *const i8, b: i32) -> int32x4_t;
+    }
+    _vld1q_v4i32(a, ALIGN)
+}
+#[inline]
+#[rustc_legacy_const_generics(1)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1q_v8i16<const ALIGN: i32>(a: *const i8) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8i16")]
+        fn _vld1q_v8i16(a: *const i8, b: i32) -> int16x8_t;
+    }
+    _vld1q_v8i16(a, ALIGN)
+}
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg(not(target_arch = "arm64ec"))]
+unsafe fn vld1_v4f16(a: *const i8, b: i32) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f16")]
+        fn _vld1_v4f16(a: *const i8, b: i32) -> float16x4_t;
+    }
+    _vld1_v4f16(a, b)
+}
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg(not(target_arch = "arm64ec"))]
+unsafe fn vld1q_v8f16(a: *const i8, b: i32) -> float16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8f16")]
+        fn _vld1q_v8f16(a: *const i8, b: i32) -> float16x8_t;
+    }
+    _vld1q_v8f16(a, b)
+}
+#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_dup_p64(ptr: *const p64) -> poly64x2_t {
+    let x = vld1q_lane_p64::<0>(ptr, transmute(u64x2::splat(0)));
+    simd_shuffle!(x, x, [0, 0])
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld3_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x3_t) -> float16x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
+pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f16.p0")]
-        fn _vld3_lane_f16(
-            ptr: *const f16,
-            a: float16x4_t,
-            b: float16x4_t,
-            c: float16x4_t,
-            n: i32,
-            size: i32,
-        ) -> float16x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f16.p0")]
+        fn _vld2_dup_f16(ptr: *const f16, size: i32) -> float16x4x2_t;
     }
-    _vld3_lane_f16(a as _, b.0, b.1, b.2, LANE, 2)
+    _vld2_dup_f16(a as _, 2)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f16)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld3q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x3_t) -> float16x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
+pub unsafe fn vld2q_dup_f16(a: *const f16) -> float16x8x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8f16.p0")]
-        fn _vld3q_lane_f16(
-            ptr: *const f16,
-            a: float16x8_t,
-            b: float16x8_t,
-            c: float16x8_t,
-            n: i32,
-            size: i32,
-        ) -> float16x8x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8f16.p0")]
+        fn _vld2q_dup_f16(ptr: *const f16, size: i32) -> float16x8x2_t;
     }
-    _vld3q_lane_f16(a as _, b.0, b.1, b.2, LANE, 2)
+    _vld2q_dup_f16(a as _, 2)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f16)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
@@ -20855,31 +21626,23 @@ pub unsafe fn vld3q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x3_t) -
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
+    assert_instr(ld2r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld3_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x3_t) -> float16x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
+pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v4f16.p0"
+            link_name = "llvm.aarch64.neon.ld2r.v4f16.p0"
         )]
-        fn _vld3_lane_f16(
-            a: float16x4_t,
-            b: float16x4_t,
-            c: float16x4_t,
-            n: i64,
-            ptr: *const f16,
-        ) -> float16x4x3_t;
+        fn _vld2_dup_f16(ptr: *const f16) -> float16x4x2_t;
     }
-    _vld3_lane_f16(b.0, b.1, b.2, LANE as i64, a as _)
+    _vld2_dup_f16(a as _)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f16)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
@@ -20887,381 +21650,371 @@ pub unsafe fn vld3_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x3_t) ->
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
+    assert_instr(ld2r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld3q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x3_t) -> float16x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
+pub unsafe fn vld2q_dup_f16(a: *const f16) -> float16x8x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v8f16.p0"
+            link_name = "llvm.aarch64.neon.ld2r.v8f16.p0"
         )]
-        fn _vld3q_lane_f16(
-            a: float16x8_t,
-            b: float16x8_t,
-            c: float16x8_t,
-            n: i64,
-            ptr: *const f16,
-        ) -> float16x8x3_t;
+        fn _vld2q_dup_f16(ptr: *const f16) -> float16x8x2_t;
     }
-    _vld3q_lane_f16(b.0, b.1, b.2, LANE as i64, a as _)
+    _vld2q_dup_f16(a as _)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x3_t) -> float32x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v2f32.p0"
-        )]
-        fn _vld3_lane_f32(
-            a: float32x2_t,
-            b: float32x2_t,
-            c: float32x2_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> float32x2x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2f32.p0")]
+        fn _vld2_dup_f32(ptr: *const i8, size: i32) -> float32x2x2_t;
     }
-    _vld3_lane_f32(b.0, b.1, b.2, LANE as i64, a as _)
+    _vld2_dup_f32(a as *const i8, 4)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x3_t) -> float32x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v4f32.p0"
-        )]
-        fn _vld3q_lane_f32(
-            a: float32x4_t,
-            b: float32x4_t,
-            c: float32x4_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> float32x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f32.p0")]
+        fn _vld2q_dup_f32(ptr: *const i8, size: i32) -> float32x4x2_t;
     }
-    _vld3q_lane_f32(b.0, b.1, b.2, LANE as i64, a as _)
+    _vld2q_dup_f32(a as *const i8, 4)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x3_t) -> float32x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2f32.p0")]
-        fn _vld3_lane_f32(
-            ptr: *const i8,
-            a: float32x2_t,
-            b: float32x2_t,
-            c: float32x2_t,
-            n: i32,
-            size: i32,
-        ) -> float32x2x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i8.p0")]
+        fn _vld2_dup_s8(ptr: *const i8, size: i32) -> int8x8x2_t;
     }
-    _vld3_lane_f32(a as _, b.0, b.1, b.2, LANE, 4)
+    _vld2_dup_s8(a as *const i8, 1)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x3_t) -> int8x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v16i8.p0")]
+        fn _vld2q_dup_s8(ptr: *const i8, size: i32) -> int8x16x2_t;
+    }
+    _vld2q_dup_s8(a as *const i8, 1)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i16.p0")]
+        fn _vld2_dup_s16(ptr: *const i8, size: i32) -> int16x4x2_t;
+    }
+    _vld2_dup_s16(a as *const i8, 2)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i16.p0")]
+        fn _vld2q_dup_s16(ptr: *const i8, size: i32) -> int16x8x2_t;
+    }
+    _vld2q_dup_s16(a as *const i8, 2)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2i32.p0")]
+        fn _vld2_dup_s32(ptr: *const i8, size: i32) -> int32x2x2_t;
+    }
+    _vld2_dup_s32(a as *const i8, 4)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i32.p0")]
+        fn _vld2q_dup_s32(ptr: *const i8, size: i32) -> int32x4x2_t;
+    }
+    _vld2q_dup_s32(a as *const i8, 4)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v8i8.p0"
+            link_name = "llvm.aarch64.neon.ld2r.v2f32.p0"
         )]
-        fn _vld3_lane_s8(
-            a: int8x8_t,
-            b: int8x8_t,
-            c: int8x8_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int8x8x3_t;
+        fn _vld2_dup_f32(ptr: *const f32) -> float32x2x2_t;
     }
-    _vld3_lane_s8(b.0, b.1, b.2, LANE as i64, a as _)
+    _vld2_dup_f32(a as _)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x3_t) -> int16x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v4i16.p0"
+            link_name = "llvm.aarch64.neon.ld2r.v4f32.p0"
         )]
-        fn _vld3_lane_s16(
-            a: int16x4_t,
-            b: int16x4_t,
-            c: int16x4_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int16x4x3_t;
+        fn _vld2q_dup_f32(ptr: *const f32) -> float32x4x2_t;
     }
-    _vld3_lane_s16(b.0, b.1, b.2, LANE as i64, a as _)
+    _vld2q_dup_f32(a as _)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x3_t) -> int16x8x3_t {
-    static_assert_uimm_bits!(LANE, 4);
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v8i16.p0"
+            link_name = "llvm.aarch64.neon.ld2r.v8i8.p0"
         )]
-        fn _vld3q_lane_s16(
-            a: int16x8_t,
-            b: int16x8_t,
-            c: int16x8_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int16x8x3_t;
+        fn _vld2_dup_s8(ptr: *const i8) -> int8x8x2_t;
     }
-    _vld3q_lane_s16(b.0, b.1, b.2, LANE as i64, a as _)
+    _vld2_dup_s8(a as _)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x3_t) -> int32x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v2i32.p0"
+            link_name = "llvm.aarch64.neon.ld2r.v16i8.p0"
         )]
-        fn _vld3_lane_s32(
-            a: int32x2_t,
-            b: int32x2_t,
-            c: int32x2_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int32x2x3_t;
+        fn _vld2q_dup_s8(ptr: *const i8) -> int8x16x2_t;
     }
-    _vld3_lane_s32(b.0, b.1, b.2, LANE as i64, a as _)
+    _vld2q_dup_s8(a as _)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x3_t) -> int32x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v4i32.p0"
+            link_name = "llvm.aarch64.neon.ld2r.v4i16.p0"
         )]
-        fn _vld3q_lane_s32(
-            a: int32x4_t,
-            b: int32x4_t,
-            c: int32x4_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int32x4x3_t;
+        fn _vld2_dup_s16(ptr: *const i16) -> int16x4x2_t;
     }
-    _vld3q_lane_s32(b.0, b.1, b.2, LANE as i64, a as _)
+    _vld2_dup_s16(a as _)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x3_t) -> int8x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i8.p0")]
-        fn _vld3_lane_s8(
-            ptr: *const i8,
-            a: int8x8_t,
-            b: int8x8_t,
-            c: int8x8_t,
-            n: i32,
-            size: i32,
-        ) -> int8x8x3_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v8i16.p0"
+        )]
+        fn _vld2q_dup_s16(ptr: *const i16) -> int16x8x2_t;
     }
-    _vld3_lane_s8(a as _, b.0, b.1, b.2, LANE, 1)
+    _vld2q_dup_s16(a as _)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x3_t) -> int16x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i16.p0")]
-        fn _vld3_lane_s16(
-            ptr: *const i8,
-            a: int16x4_t,
-            b: int16x4_t,
-            c: int16x4_t,
-            n: i32,
-            size: i32,
-        ) -> int16x4x3_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v2i32.p0"
+        )]
+        fn _vld2_dup_s32(ptr: *const i32) -> int32x2x2_t;
     }
-    _vld3_lane_s16(a as _, b.0, b.1, b.2, LANE, 2)
+    _vld2_dup_s32(a as _)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x3_t) -> int16x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i16.p0")]
-        fn _vld3q_lane_s16(
-            ptr: *const i8,
-            a: int16x8_t,
-            b: int16x8_t,
-            c: int16x8_t,
-            n: i32,
-            size: i32,
-        ) -> int16x8x3_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v4i32.p0"
+        )]
+        fn _vld2q_dup_s32(ptr: *const i32) -> int32x4x2_t;
     }
-    _vld3q_lane_s16(a as _, b.0, b.1, b.2, LANE, 2)
+    _vld2q_dup_s32(a as _)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_dup_p64(a: *const p64) -> poly64x1x2_t {
+    transmute(vld2_dup_s64(transmute(a)))
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x3_t) -> int32x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2i32.p0")]
-        fn _vld3_lane_s32(
-            ptr: *const i8,
-            a: int32x2_t,
-            b: int32x2_t,
-            c: int32x2_t,
-            n: i32,
-            size: i32,
-        ) -> int32x2x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v1i64.p0")]
+        fn _vld2_dup_s64(ptr: *const i8, size: i32) -> int64x1x2_t;
     }
-    _vld3_lane_s32(a as _, b.0, b.1, b.2, LANE, 4)
+    _vld2_dup_s64(a as *const i8, 8)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x3_t) -> int32x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i32.p0")]
-        fn _vld3q_lane_s32(
-            ptr: *const i8,
-            a: int32x4_t,
-            b: int32x4_t,
-            c: int32x4_t,
-            n: i32,
-            size: i32,
-        ) -> int32x4x3_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v1i64.p0"
+        )]
+        fn _vld2_dup_s64(ptr: *const i64) -> int64x1x2_t;
     }
-    _vld3q_lane_s32(a as _, b.0, b.1, b.2, LANE, 4)
+    _vld2_dup_s64(a as _)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u8)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
+    assert_instr(ld2r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21270,23 +22023,21 @@ pub unsafe fn vld3q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x3_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x3_t) -> uint8x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld3_lane_s8::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld2_dup_u64(a: *const u64) -> uint64x1x2_t {
+    transmute(vld2_dup_s64(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u16)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
+    assert_instr(ld2r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21295,23 +22046,21 @@ pub unsafe fn vld3_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x3_t) -> uin
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x3_t) -> uint16x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld3_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t {
+    transmute(vld2_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u16)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
+    assert_instr(ld2r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21320,23 +22069,21 @@ pub unsafe fn vld3_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x3_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x3_t) -> uint16x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld3q_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t {
+    transmute(vld2q_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u32)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
+    assert_instr(ld2r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21345,23 +22092,21 @@ pub unsafe fn vld3q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x3_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x3_t) -> uint32x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
-    transmute(vld3_lane_s32::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t {
+    transmute(vld2_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u32)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
+    assert_instr(ld2r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21370,23 +22115,21 @@ pub unsafe fn vld3_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x3_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x3_t) -> uint32x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld3q_lane_s32::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t {
+    transmute(vld2q_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p8)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
+    assert_instr(ld2r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21395,23 +22138,21 @@ pub unsafe fn vld3q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x3_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x3_t) -> poly8x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld3_lane_s8::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t {
+    transmute(vld2_dup_s32(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p16)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
+    assert_instr(ld2r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21420,23 +22161,21 @@ pub unsafe fn vld3_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x3_t) -> pol
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x3_t) -> poly16x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld3_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t {
+    transmute(vld2q_dup_s32(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_p16)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
+    assert_instr(ld2r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21445,21 +22184,20 @@ pub unsafe fn vld3_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x3_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x3_t) -> poly16x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld3q_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t {
+    transmute(vld2_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p64)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(ld2r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -21469,232 +22207,20 @@ pub unsafe fn vld3q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x3_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_p64(a: *const p64) -> poly64x1x3_t {
-    transmute(vld3_s64(transmute(a)))
+pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t {
+    transmute(vld2q_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t {
-    crate::ptr::read_unaligned(a.cast())
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v1i64.p0")]
-        fn _vld3_s64(ptr: *const i8, size: i32) -> int64x1x3_t;
-    }
-    _vld3_s64(a as *const i8, 8)
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3_u64(a: *const u64) -> uint64x1x3_t {
-    transmute(vld3_s64(transmute(a)))
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3_u8(a: *const u8) -> uint8x8x3_t {
-    transmute(vld3_s8(transmute(a)))
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3q_u8(a: *const u8) -> uint8x16x3_t {
-    transmute(vld3q_s8(transmute(a)))
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3_u16(a: *const u16) -> uint16x4x3_t {
-    transmute(vld3_s16(transmute(a)))
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3q_u16(a: *const u16) -> uint16x8x3_t {
-    transmute(vld3q_s16(transmute(a)))
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3_u32(a: *const u32) -> uint32x2x3_t {
-    transmute(vld3_s32(transmute(a)))
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3q_u32(a: *const u32) -> uint32x4x3_t {
-    transmute(vld3q_s32(transmute(a)))
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3_p8(a: *const p8) -> poly8x8x3_t {
-    transmute(vld3_s8(transmute(a)))
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p8)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
+    assert_instr(ld2r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -21704,20 +22230,20 @@ pub unsafe fn vld3_p8(a: *const p8) -> poly8x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_p8(a: *const p8) -> poly8x16x3_t {
-    transmute(vld3q_s8(transmute(a)))
+pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t {
+    transmute(vld2_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p16)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
+    assert_instr(ld2r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -21727,1533 +22253,1704 @@ pub unsafe fn vld3q_p8(a: *const p8) -> poly8x16x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_p16(a: *const p16) -> poly16x4x3_t {
-    transmute(vld3_s16(transmute(a)))
+pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t {
+    transmute(vld2q_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p16)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3q_p16(a: *const p16) -> poly16x8x3_t {
-    transmute(vld3q_s16(transmute(a)))
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x3_t) -> float32x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f32.p0")]
-        fn _vld3q_lane_f32(
-            ptr: *const i8,
-            a: float32x4_t,
-            b: float32x4_t,
-            c: float32x4_t,
-            n: i32,
-            size: i32,
-        ) -> float32x4x3_t;
-    }
-    _vld3q_lane_f32(a as _, b.0, b.1, b.2, LANE, 4)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t {
+pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f16.p0")]
-        fn _vld4_dup_f16(ptr: *const f16, size: i32) -> float16x4x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f16.p0")]
+        fn _vld2_f16(ptr: *const f16, size: i32) -> float16x4x2_t;
     }
-    _vld4_dup_f16(a as _, 2)
+    _vld2_f16(a as _, 2)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f16)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld4q_dup_f16(a: *const f16) -> float16x8x4_t {
+pub unsafe fn vld2q_f16(a: *const f16) -> float16x8x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8f16.p0")]
-        fn _vld4q_dup_f16(ptr: *const f16, size: i32) -> float16x8x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8f16.p0")]
+        fn _vld2q_f16(ptr: *const f16, size: i32) -> float16x8x2_t;
     }
-    _vld4q_dup_f16(a as _, 2)
+    _vld2q_f16(a as _, 2)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f16)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld2)
 )]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t {
+pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v4f16.p0"
+            link_name = "llvm.aarch64.neon.ld2.v4f16.p0"
         )]
-        fn _vld4_dup_f16(ptr: *const f16) -> float16x4x4_t;
+        fn _vld2_f16(ptr: *const f16) -> float16x4x2_t;
     }
-    _vld4_dup_f16(a as _)
+    _vld2_f16(a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f16)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld2)
 )]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld4q_dup_f16(a: *const f16) -> float16x8x4_t {
+pub unsafe fn vld2q_f16(a: *const f16) -> float16x8x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v8f16.p0"
+            link_name = "llvm.aarch64.neon.ld2.v8f16.p0"
         )]
-        fn _vld4q_dup_f16(ptr: *const f16) -> float16x8x4_t;
+        fn _vld2q_f16(ptr: *const f16) -> float16x8x2_t;
     }
-    _vld4q_dup_f16(a as _)
+    _vld2q_f16(a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t {
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2f32.p0")]
-        fn _vld4_dup_f32(ptr: *const i8, size: i32) -> float32x2x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2f32")]
+        fn _vld2_f32(ptr: *const i8, size: i32) -> float32x2x2_t;
     }
-    _vld4_dup_f32(a as *const i8, 4)
+    _vld2_f32(a as *const i8, 4)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t {
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f32.p0")]
-        fn _vld4q_dup_f32(ptr: *const i8, size: i32) -> float32x4x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f32")]
+        fn _vld2q_f32(ptr: *const i8, size: i32) -> float32x4x2_t;
     }
-    _vld4q_dup_f32(a as *const i8, 4)
+    _vld2q_f32(a as *const i8, 4)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t {
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i8.p0")]
-        fn _vld4_dup_s8(ptr: *const i8, size: i32) -> int8x8x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i8")]
+        fn _vld2_s8(ptr: *const i8, size: i32) -> int8x8x2_t;
     }
-    _vld4_dup_s8(a as *const i8, 1)
+    _vld2_s8(a as *const i8, 1)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t {
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v16i8.p0")]
-        fn _vld4q_dup_s8(ptr: *const i8, size: i32) -> int8x16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v16i8")]
+        fn _vld2q_s8(ptr: *const i8, size: i32) -> int8x16x2_t;
     }
-    _vld4q_dup_s8(a as *const i8, 1)
+    _vld2q_s8(a as *const i8, 1)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t {
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i16.p0")]
-        fn _vld4_dup_s16(ptr: *const i8, size: i32) -> int16x4x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i16")]
+        fn _vld2_s16(ptr: *const i8, size: i32) -> int16x4x2_t;
     }
-    _vld4_dup_s16(a as *const i8, 2)
+    _vld2_s16(a as *const i8, 2)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t {
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i16.p0")]
-        fn _vld4q_dup_s16(ptr: *const i8, size: i32) -> int16x8x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i16")]
+        fn _vld2q_s16(ptr: *const i8, size: i32) -> int16x8x2_t;
     }
-    _vld4q_dup_s16(a as *const i8, 2)
+    _vld2q_s16(a as *const i8, 2)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t {
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2i32.p0")]
-        fn _vld4_dup_s32(ptr: *const i8, size: i32) -> int32x2x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2i32")]
+        fn _vld2_s32(ptr: *const i8, size: i32) -> int32x2x2_t;
     }
-    _vld4_dup_s32(a as *const i8, 4)
+    _vld2_s32(a as *const i8, 4)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t {
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i32.p0")]
-        fn _vld4q_dup_s32(ptr: *const i8, size: i32) -> int32x4x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i32")]
+        fn _vld2q_s32(ptr: *const i8, size: i32) -> int32x4x2_t;
     }
-    _vld4q_dup_s32(a as *const i8, 4)
+    _vld2q_s32(a as *const i8, 4)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t {
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v2f32.p0.p0"
+            link_name = "llvm.aarch64.neon.ld2.v2f32.p0"
         )]
-        fn _vld4_dup_f32(ptr: *const f32) -> float32x2x4_t;
+        fn _vld2_f32(ptr: *const float32x2_t) -> float32x2x2_t;
     }
-    _vld4_dup_f32(a as _)
+    _vld2_f32(a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t {
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v4f32.p0.p0"
+            link_name = "llvm.aarch64.neon.ld2.v4f32.p0"
         )]
-        fn _vld4q_dup_f32(ptr: *const f32) -> float32x4x4_t;
+        fn _vld2q_f32(ptr: *const float32x4_t) -> float32x4x2_t;
     }
-    _vld4q_dup_f32(a as _)
+    _vld2q_f32(a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t {
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v8i8.p0.p0"
+            link_name = "llvm.aarch64.neon.ld2.v8i8.p0"
         )]
-        fn _vld4_dup_s8(ptr: *const i8) -> int8x8x4_t;
+        fn _vld2_s8(ptr: *const int8x8_t) -> int8x8x2_t;
     }
-    _vld4_dup_s8(a as _)
+    _vld2_s8(a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t {
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v16i8.p0.p0"
+            link_name = "llvm.aarch64.neon.ld2.v16i8.p0"
         )]
-        fn _vld4q_dup_s8(ptr: *const i8) -> int8x16x4_t;
+        fn _vld2q_s8(ptr: *const int8x16_t) -> int8x16x2_t;
     }
-    _vld4q_dup_s8(a as _)
+    _vld2q_s8(a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t {
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v4i16.p0.p0"
+            link_name = "llvm.aarch64.neon.ld2.v4i16.p0"
         )]
-        fn _vld4_dup_s16(ptr: *const i16) -> int16x4x4_t;
+        fn _vld2_s16(ptr: *const int16x4_t) -> int16x4x2_t;
     }
-    _vld4_dup_s16(a as _)
+    _vld2_s16(a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t {
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v8i16.p0.p0"
+            link_name = "llvm.aarch64.neon.ld2.v8i16.p0"
         )]
-        fn _vld4q_dup_s16(ptr: *const i16) -> int16x8x4_t;
+        fn _vld2q_s16(ptr: *const int16x8_t) -> int16x8x2_t;
     }
-    _vld4q_dup_s16(a as _)
+    _vld2q_s16(a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t {
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v2i32.p0.p0"
+            link_name = "llvm.aarch64.neon.ld2.v2i32.p0"
         )]
-        fn _vld4_dup_s32(ptr: *const i32) -> int32x2x4_t;
+        fn _vld2_s32(ptr: *const int32x2_t) -> int32x2x2_t;
     }
-    _vld4_dup_s32(a as _)
+    _vld2_s32(a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t {
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v4i32.p0.p0"
+            link_name = "llvm.aarch64.neon.ld2.v4i32.p0"
         )]
-        fn _vld4q_dup_s32(ptr: *const i32) -> int32x4x4_t;
+        fn _vld2q_s32(ptr: *const int32x4_t) -> int32x4x2_t;
     }
-    _vld4q_dup_s32(a as _)
+    _vld2q_s32(a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t {
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld2_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x2_t) -> float16x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v1i64.p0.p0"
-        )]
-        fn _vld4_dup_s64(ptr: *const i64) -> int64x1x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f16.p0")]
+        fn _vld2_lane_f16(
+            ptr: *const f16,
+            a: float16x4_t,
+            b: float16x4_t,
+            n: i32,
+            size: i32,
+        ) -> float16x4x2_t;
     }
-    _vld4_dup_s64(a as _)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4_dup_p64(a: *const p64) -> poly64x1x4_t {
-    transmute(vld4_dup_s64(transmute(a)))
+    _vld2_lane_f16(a as _, b.0, b.1, LANE, 2)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(nop))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t {
+#[cfg(target_arch = "arm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld2q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x2_t) -> float16x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v1i64.p0")]
-        fn _vld4_dup_s64(ptr: *const i8, size: i32) -> int64x1x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8f16.p0")]
+        fn _vld2q_lane_f16(
+            ptr: *const f16,
+            a: float16x8_t,
+            b: float16x8_t,
+            n: i32,
+            size: i32,
+        ) -> float16x8x2_t;
     }
-    _vld4_dup_s64(a as *const i8, 8)
+    _vld2q_lane_f16(a as _, b.0, b.1, LANE, 2)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u64)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    assert_instr(ld2, LANE = 0)
 )]
-pub unsafe fn vld4_dup_u64(a: *const u64) -> uint64x1x4_t {
-    transmute(vld4_dup_s64(transmute(a)))
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld2_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x2_t) -> float16x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v4f16.p0"
+        )]
+        fn _vld2_lane_f16(a: float16x4_t, b: float16x4_t, n: i64, ptr: *const f16)
+            -> float16x4x2_t;
+    }
+    _vld2_lane_f16(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    assert_instr(ld2, LANE = 0)
 )]
-pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t {
-    transmute(vld4_dup_s8(transmute(a)))
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld2q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x2_t) -> float16x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v8f16.p0"
+        )]
+        fn _vld2q_lane_f16(
+            a: float16x8_t,
+            b: float16x8_t,
+            n: i64,
+            ptr: *const f16,
+        ) -> float16x8x2_t;
+    }
+    _vld2q_lane_f16(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t {
-    transmute(vld4q_dup_s8(transmute(a)))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x2_t) -> float32x2x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v2f32.p0"
+        )]
+        fn _vld2_lane_f32(a: float32x2_t, b: float32x2_t, n: i64, ptr: *const i8) -> float32x2x2_t;
+    }
+    _vld2_lane_f32(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t {
-    transmute(vld4_dup_s16(transmute(a)))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x2_t) -> float32x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v4f32.p0"
+        )]
+        fn _vld2q_lane_f32(a: float32x4_t, b: float32x4_t, n: i64, ptr: *const i8)
+            -> float32x4x2_t;
+    }
+    _vld2q_lane_f32(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t {
-    transmute(vld4q_dup_s16(transmute(a)))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x2_t) -> int8x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v8i8.p0"
+        )]
+        fn _vld2_lane_s8(a: int8x8_t, b: int8x8_t, n: i64, ptr: *const i8) -> int8x8x2_t;
+    }
+    _vld2_lane_s8(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t {
-    transmute(vld4_dup_s32(transmute(a)))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x2_t) -> int16x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v4i16.p0"
+        )]
+        fn _vld2_lane_s16(a: int16x4_t, b: int16x4_t, n: i64, ptr: *const i8) -> int16x4x2_t;
+    }
+    _vld2_lane_s16(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t {
-    transmute(vld4q_dup_s32(transmute(a)))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x2_t) -> int16x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v8i16.p0"
+        )]
+        fn _vld2q_lane_s16(a: int16x8_t, b: int16x8_t, n: i64, ptr: *const i8) -> int16x8x2_t;
+    }
+    _vld2q_lane_s16(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t {
-    transmute(vld4_dup_s8(transmute(a)))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x2_t) -> int32x2x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v2i32.p0"
+        )]
+        fn _vld2_lane_s32(a: int32x2_t, b: int32x2_t, n: i64, ptr: *const i8) -> int32x2x2_t;
+    }
+    _vld2_lane_s32(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t {
-    transmute(vld4q_dup_s8(transmute(a)))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x2_t) -> int32x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v4i32.p0"
+        )]
+        fn _vld2q_lane_s32(a: int32x4_t, b: int32x4_t, n: i64, ptr: *const i8) -> int32x4x2_t;
+    }
+    _vld2q_lane_s32(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t {
-    transmute(vld4_dup_s16(transmute(a)))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x2_t) -> float32x2x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2f32.p0")]
+        fn _vld2_lane_f32(
+            ptr: *const i8,
+            a: float32x2_t,
+            b: float32x2_t,
+            n: i32,
+            size: i32,
+        ) -> float32x2x2_t;
+    }
+    _vld2_lane_f32(a as _, b.0, b.1, LANE, 4)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t {
-    transmute(vld4q_dup_s16(transmute(a)))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x2_t) -> float32x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f32.p0")]
+        fn _vld2q_lane_f32(
+            ptr: *const i8,
+            a: float32x4_t,
+            b: float32x4_t,
+            n: i32,
+            size: i32,
+        ) -> float32x4x2_t;
+    }
+    _vld2q_lane_f32(a as _, b.0, b.1, LANE, 4)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t {
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x2_t) -> int16x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f16.p0")]
-        fn _vld4_f16(ptr: *const f16, size: i32) -> float16x4x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i16.p0")]
+        fn _vld2q_lane_s16(
+            ptr: *const i8,
+            a: int16x8_t,
+            b: int16x8_t,
+            n: i32,
+            size: i32,
+        ) -> int16x8x2_t;
     }
-    _vld4_f16(a as _, 2)
+    _vld2q_lane_s16(a as _, b.0, b.1, LANE, 2)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld4q_f16(a: *const f16) -> float16x8x4_t {
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x2_t) -> int32x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8f16.p0")]
-        fn _vld4q_f16(ptr: *const f16, size: i32) -> float16x8x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i32.p0")]
+        fn _vld2q_lane_s32(
+            ptr: *const i8,
+            a: int32x4_t,
+            b: int32x4_t,
+            n: i32,
+            size: i32,
+        ) -> int32x4x2_t;
     }
-    _vld4q_f16(a as _, 2)
+    _vld2q_lane_s32(a as _, b.0, b.1, LANE, 4)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t {
-    crate::core_arch::macros::deinterleaving_load!(f16, 4, 4, a)
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x2_t) -> int8x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i8.p0")]
+        fn _vld2_lane_s8(ptr: *const i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32)
+            -> int8x8x2_t;
+    }
+    _vld2_lane_s8(a as _, b.0, b.1, LANE, 1)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld4q_f16(a: *const f16) -> float16x8x4_t {
-    crate::core_arch::macros::deinterleaving_load!(f16, 8, 4, a)
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x2_t) -> int16x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i16.p0")]
+        fn _vld2_lane_s16(
+            ptr: *const i8,
+            a: int16x4_t,
+            b: int16x4_t,
+            n: i32,
+            size: i32,
+        ) -> int16x4x2_t;
+    }
+    _vld2_lane_s16(a as _, b.0, b.1, LANE, 2)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x2_t) -> int32x2x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2i32.p0")]
+        fn _vld2_lane_s32(
+            ptr: *const i8,
+            a: int32x2_t,
+            b: int32x2_t,
+            n: i32,
+            size: i32,
+        ) -> int32x2x2_t;
+    }
+    _vld2_lane_s32(a as _, b.0, b.1, LANE, 4)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t {
-    crate::core_arch::macros::deinterleaving_load!(f32, 2, 4, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x2_t) -> uint8x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld2_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t {
-    crate::core_arch::macros::deinterleaving_load!(f32, 4, 4, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x2_t) -> uint16x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld2_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t {
-    crate::core_arch::macros::deinterleaving_load!(i8, 8, 4, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x2_t) -> uint16x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld2q_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t {
-    crate::core_arch::macros::deinterleaving_load!(i8, 16, 4, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x2_t) -> uint32x2x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    transmute(vld2_lane_s32::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t {
-    crate::core_arch::macros::deinterleaving_load!(i16, 4, 4, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x2_t) -> uint32x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld2q_lane_s32::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t {
-    crate::core_arch::macros::deinterleaving_load!(i16, 8, 4, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x2_t) -> poly8x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld2_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t {
-    crate::core_arch::macros::deinterleaving_load!(i32, 2, 4, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x2_t) -> poly16x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld2_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t {
-    crate::core_arch::macros::deinterleaving_load!(i32, 4, 4, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x2_t) -> poly16x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld2q_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld4))]
-pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2f32.p0")]
-        fn _vld4_f32(ptr: *const i8, size: i32) -> float32x2x4_t;
-    }
-    _vld4_f32(a as *const i8, 4)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_p64(a: *const p64) -> poly64x1x2_t {
+    transmute(vld2_s64(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld4))]
-pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t {
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f32.p0")]
-        fn _vld4q_f32(ptr: *const i8, size: i32) -> float32x4x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v1i64")]
+        fn _vld2_s64(ptr: *const i8, size: i32) -> int64x1x2_t;
     }
-    _vld4q_f32(a as *const i8, 4)
+    _vld2_s64(a as *const i8, 8)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld4))]
-pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t {
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i8.p0")]
-        fn _vld4_s8(ptr: *const i8, size: i32) -> int8x8x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v1i64.p0"
+        )]
+        fn _vld2_s64(ptr: *const int64x1_t) -> int64x1x2_t;
     }
-    _vld4_s8(a as *const i8, 1)
+    _vld2_s64(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld4))]
-pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v16i8.p0")]
-        fn _vld4q_s8(ptr: *const i8, size: i32) -> int8x16x4_t;
-    }
-    _vld4q_s8(a as *const i8, 1)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_u64(a: *const u64) -> uint64x1x2_t {
+    transmute(vld2_s64(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld4))]
-pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i16.p0")]
-        fn _vld4_s16(ptr: *const i8, size: i32) -> int16x4x4_t;
-    }
-    _vld4_s16(a as *const i8, 2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_u8(a: *const u8) -> uint8x8x2_t {
+    transmute(vld2_s8(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld4))]
-pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i16.p0")]
-        fn _vld4q_s16(ptr: *const i8, size: i32) -> int16x8x4_t;
-    }
-    _vld4q_s16(a as *const i8, 2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_u8(a: *const u8) -> uint8x16x2_t {
+    transmute(vld2q_s8(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld4))]
-pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2i32.p0")]
-        fn _vld4_s32(ptr: *const i8, size: i32) -> int32x2x4_t;
-    }
-    _vld4_s32(a as *const i8, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_u16(a: *const u16) -> uint16x4x2_t {
+    transmute(vld2_s16(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld4))]
-pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i32.p0")]
-        fn _vld4q_s32(ptr: *const i8, size: i32) -> int32x4x4_t;
-    }
-    _vld4q_s32(a as *const i8, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_u16(a: *const u16) -> uint16x8x2_t {
+    transmute(vld2q_s16(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_u32(a: *const u32) -> uint32x2x2_t {
+    transmute(vld2_s32(transmute(a)))
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_u32(a: *const u32) -> uint32x4x2_t {
+    transmute(vld2q_s32(transmute(a)))
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_p8(a: *const p8) -> poly8x8x2_t {
+    transmute(vld2_s8(transmute(a)))
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_p8(a: *const p8) -> poly8x16x2_t {
+    transmute(vld2q_s8(transmute(a)))
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_p16(a: *const p16) -> poly16x4x2_t {
+    transmute(vld2_s16(transmute(a)))
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_p16(a: *const p16) -> poly16x8x2_t {
+    transmute(vld2q_s16(transmute(a)))
+}
+#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld4_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x4_t) -> float16x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
+pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f16.p0")]
-        fn _vld4_lane_f16(
-            ptr: *const f16,
-            a: float16x4_t,
-            b: float16x4_t,
-            c: float16x4_t,
-            d: float16x4_t,
-            n: i32,
-            size: i32,
-        ) -> float16x4x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f16.p0")]
+        fn _vld3_dup_f16(ptr: *const f16, size: i32) -> float16x4x3_t;
     }
-    _vld4_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
+    _vld3_dup_f16(a as _, 2)
 }
-#[doc = "Load multiple 4-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld4q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x4_t) -> float16x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
+pub unsafe fn vld3q_dup_f16(a: *const f16) -> float16x8x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8f16.p0")]
-        fn _vld4q_lane_f16(
-            ptr: *const f16,
-            a: float16x8_t,
-            b: float16x8_t,
-            c: float16x8_t,
-            d: float16x8_t,
-            n: i32,
-            size: i32,
-        ) -> float16x8x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8f16.p0")]
+        fn _vld3q_dup_f16(ptr: *const f16, size: i32) -> float16x8x3_t;
     }
-    _vld4q_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
+    _vld3q_dup_f16(a as _, 2)
 }
-#[doc = "Load multiple 4-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(ld3r)
 )]
-#[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld4_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x4_t) -> float16x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
+pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v4f16.p0"
+            link_name = "llvm.aarch64.neon.ld3r.v4f16.p0"
         )]
-        fn _vld4_lane_f16(
-            a: float16x4_t,
-            b: float16x4_t,
-            c: float16x4_t,
-            d: float16x4_t,
-            n: i64,
-            ptr: *const f16,
-        ) -> float16x4x4_t;
+        fn _vld3_dup_f16(ptr: *const f16) -> float16x4x3_t;
     }
-    _vld4_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    _vld3_dup_f16(a as _)
 }
-#[doc = "Load multiple 4-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(ld3r)
 )]
-#[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld4q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x4_t) -> float16x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
+pub unsafe fn vld3q_dup_f16(a: *const f16) -> float16x8x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v8f16.p0"
+            link_name = "llvm.aarch64.neon.ld3r.v8f16.p0"
         )]
-        fn _vld4q_lane_f16(
-            a: float16x8_t,
-            b: float16x8_t,
-            c: float16x8_t,
-            d: float16x8_t,
-            n: i64,
-            ptr: *const f16,
-        ) -> float16x8x4_t;
+        fn _vld3q_dup_f16(ptr: *const f16) -> float16x8x3_t;
     }
-    _vld4q_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    _vld3q_dup_f16(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x4_t) -> float32x2x4_t {
-    static_assert_uimm_bits!(LANE, 1);
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v2f32.p0"
+            link_name = "llvm.aarch64.neon.ld3r.v2f32.p0"
         )]
-        fn _vld4_lane_f32(
-            a: float32x2_t,
-            b: float32x2_t,
-            c: float32x2_t,
-            d: float32x2_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> float32x2x4_t;
+        fn _vld3_dup_f32(ptr: *const f32) -> float32x2x3_t;
     }
-    _vld4_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    _vld3_dup_f32(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x4_t) -> float32x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v4f32.p0"
+            link_name = "llvm.aarch64.neon.ld3r.v4f32.p0"
         )]
-        fn _vld4q_lane_f32(
-            a: float32x4_t,
-            b: float32x4_t,
-            c: float32x4_t,
-            d: float32x4_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> float32x4x4_t;
+        fn _vld3q_dup_f32(ptr: *const f32) -> float32x4x3_t;
     }
-    _vld4q_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    _vld3q_dup_f32(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x4_t) -> int8x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v8i8.p0"
+            link_name = "llvm.aarch64.neon.ld3r.v8i8.p0"
         )]
-        fn _vld4_lane_s8(
-            a: int8x8_t,
-            b: int8x8_t,
-            c: int8x8_t,
-            d: int8x8_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int8x8x4_t;
+        fn _vld3_dup_s8(ptr: *const i8) -> int8x8x3_t;
     }
-    _vld4_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    _vld3_dup_s8(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x4_t) -> int16x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v4i16.p0"
+            link_name = "llvm.aarch64.neon.ld3r.v16i8.p0"
         )]
-        fn _vld4_lane_s16(
-            a: int16x4_t,
-            b: int16x4_t,
-            c: int16x4_t,
-            d: int16x4_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int16x4x4_t;
+        fn _vld3q_dup_s8(ptr: *const i8) -> int8x16x3_t;
     }
-    _vld4_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    _vld3q_dup_s8(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x4_t) -> int16x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v8i16.p0"
+            link_name = "llvm.aarch64.neon.ld3r.v4i16.p0"
         )]
-        fn _vld4q_lane_s16(
-            a: int16x8_t,
-            b: int16x8_t,
-            c: int16x8_t,
-            d: int16x8_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int16x8x4_t;
+        fn _vld3_dup_s16(ptr: *const i16) -> int16x4x3_t;
     }
-    _vld4q_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    _vld3_dup_s16(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x4_t) -> int32x2x4_t {
-    static_assert_uimm_bits!(LANE, 1);
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v2i32.p0"
+            link_name = "llvm.aarch64.neon.ld3r.v8i16.p0"
         )]
-        fn _vld4_lane_s32(
-            a: int32x2_t,
-            b: int32x2_t,
-            c: int32x2_t,
-            d: int32x2_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int32x2x4_t;
+        fn _vld3q_dup_s16(ptr: *const i16) -> int16x8x3_t;
     }
-    _vld4_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    _vld3q_dup_s16(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x4_t) -> int32x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v4i32.p0"
+            link_name = "llvm.aarch64.neon.ld3r.v2i32.p0"
         )]
-        fn _vld4q_lane_s32(
-            a: int32x4_t,
-            b: int32x4_t,
-            c: int32x4_t,
-            d: int32x4_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int32x4x4_t;
+        fn _vld3_dup_s32(ptr: *const i32) -> int32x2x3_t;
     }
-    _vld4q_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    _vld3_dup_s32(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x4_t) -> float32x2x4_t {
-    static_assert_uimm_bits!(LANE, 1);
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2f32.p0")]
-        fn _vld4_lane_f32(
-            ptr: *const i8,
-            a: float32x2_t,
-            b: float32x2_t,
-            c: float32x2_t,
-            d: float32x2_t,
-            n: i32,
-            size: i32,
-        ) -> float32x2x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3r.v4i32.p0"
+        )]
+        fn _vld3q_dup_s32(ptr: *const i32) -> int32x4x3_t;
     }
-    _vld4_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+    _vld3q_dup_s32(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3r.v1i64.p0"
+        )]
+        fn _vld3_dup_s64(ptr: *const i64) -> int64x1x3_t;
+    }
+    _vld3_dup_s64(a as _)
+}
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x4_t) -> float32x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f32.p0")]
-        fn _vld4q_lane_f32(
-            ptr: *const i8,
-            a: float32x4_t,
-            b: float32x4_t,
-            c: float32x4_t,
-            d: float32x4_t,
-            n: i32,
-            size: i32,
-        ) -> float32x4x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2f32.p0")]
+        fn _vld3_dup_f32(ptr: *const i8, size: i32) -> float32x2x3_t;
     }
-    _vld4q_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+    _vld3_dup_f32(a as *const i8, 4)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x4_t) -> int8x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i8.p0")]
-        fn _vld4_lane_s8(
-            ptr: *const i8,
-            a: int8x8_t,
-            b: int8x8_t,
-            c: int8x8_t,
-            d: int8x8_t,
-            n: i32,
-            size: i32,
-        ) -> int8x8x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f32.p0")]
+        fn _vld3q_dup_f32(ptr: *const i8, size: i32) -> float32x4x3_t;
     }
-    _vld4_lane_s8(a as _, b.0, b.1, b.2, b.3, LANE, 1)
+    _vld3q_dup_f32(a as *const i8, 4)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x4_t) -> int16x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i16.p0")]
-        fn _vld4_lane_s16(
-            ptr: *const i8,
-            a: int16x4_t,
-            b: int16x4_t,
-            c: int16x4_t,
-            d: int16x4_t,
-            n: i32,
-            size: i32,
-        ) -> int16x4x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i8.p0")]
+        fn _vld3_dup_s8(ptr: *const i8, size: i32) -> int8x8x3_t;
     }
-    _vld4_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
+    _vld3_dup_s8(a as *const i8, 1)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x4_t) -> int16x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i16.p0")]
-        fn _vld4q_lane_s16(
-            ptr: *const i8,
-            a: int16x8_t,
-            b: int16x8_t,
-            c: int16x8_t,
-            d: int16x8_t,
-            n: i32,
-            size: i32,
-        ) -> int16x8x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v16i8.p0")]
+        fn _vld3q_dup_s8(ptr: *const i8, size: i32) -> int8x16x3_t;
     }
-    _vld4q_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
+    _vld3q_dup_s8(a as *const i8, 1)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x4_t) -> int32x2x4_t {
-    static_assert_uimm_bits!(LANE, 1);
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2i32.p0")]
-        fn _vld4_lane_s32(
-            ptr: *const i8,
-            a: int32x2_t,
-            b: int32x2_t,
-            c: int32x2_t,
-            d: int32x2_t,
-            n: i32,
-            size: i32,
-        ) -> int32x2x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i16.p0")]
+        fn _vld3_dup_s16(ptr: *const i8, size: i32) -> int16x4x3_t;
     }
-    _vld4_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+    _vld3_dup_s16(a as *const i8, 2)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x4_t) -> int32x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i32.p0")]
-        fn _vld4q_lane_s32(
-            ptr: *const i8,
-            a: int32x4_t,
-            b: int32x4_t,
-            c: int32x4_t,
-            d: int32x4_t,
-            n: i32,
-            size: i32,
-        ) -> int32x4x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i16.p0")]
+        fn _vld3q_dup_s16(ptr: *const i8, size: i32) -> int16x8x3_t;
     }
-    _vld4q_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+    _vld3q_dup_s16(a as *const i8, 2)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u8)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2i32.p0")]
+        fn _vld3_dup_s32(ptr: *const i8, size: i32) -> int32x2x3_t;
+    }
+    _vld3_dup_s32(a as *const i8, 4)
+}
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i32.p0")]
+        fn _vld3q_dup_s32(ptr: *const i8, size: i32) -> int32x4x3_t;
+    }
+    _vld3q_dup_s32(a as *const i8, 4)
+}
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(ld3r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -23262,23 +23959,37 @@ pub unsafe fn vld4q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x4_t) -> uint8x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld4_lane_s8::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld3_dup_p64(a: *const p64) -> poly64x1x3_t {
+    transmute(vld3_dup_s64(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v1i64.p0")]
+        fn _vld3_dup_s64(ptr: *const i8, size: i32) -> int64x1x3_t;
+    }
+    _vld3_dup_s64(a as *const i8, 8)
+}
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(ld3r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -23287,23 +23998,21 @@ pub unsafe fn vld4_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x4_t) -> uin
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x4_t) -> uint16x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld4_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld3_dup_u64(a: *const u64) -> uint64x1x3_t {
+    transmute(vld3_dup_s64(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(ld3r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -23312,23 +24021,21 @@ pub unsafe fn vld4_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x4_t) -> uint16x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld4q_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t {
+    transmute(vld3_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(ld3r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -23337,23 +24044,21 @@ pub unsafe fn vld4q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x4_t) -> uint32x2x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    transmute(vld4_lane_s32::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t {
+    transmute(vld3q_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(ld3r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -23362,23 +24067,21 @@ pub unsafe fn vld4_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x4_t) -> uint32x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld4q_lane_s32::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t {
+    transmute(vld3_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p8)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(ld3r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -23387,23 +24090,21 @@ pub unsafe fn vld4q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x4_t) -> poly8x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld4_lane_s8::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t {
+    transmute(vld3q_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(ld3r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -23412,23 +24113,21 @@ pub unsafe fn vld4_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x4_t) -> pol
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x4_t) -> poly16x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld4_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t {
+    transmute(vld3_dup_s32(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_p16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(ld3r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -23437,21 +24136,20 @@ pub unsafe fn vld4_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x4_t) -> poly16x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld4q_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t {
+    transmute(vld3q_dup_s32(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p64)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(ld3r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23461,48 +24159,20 @@ pub unsafe fn vld4q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_p64(a: *const p64) -> poly64x1x4_t {
-    transmute(vld4_s64(transmute(a)))
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t {
-    crate::ptr::read_unaligned(a.cast())
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v1i64.p0")]
-        fn _vld4_s64(ptr: *const i8, size: i32) -> int64x1x4_t;
-    }
-    _vld4_s64(a as *const i8, 8)
+pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t {
+    transmute(vld3_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u64)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(ld3r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23512,20 +24182,20 @@ pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_u64(a: *const u64) -> uint64x1x4_t {
-    transmute(vld4_s64(transmute(a)))
+pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t {
+    transmute(vld3q_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u8)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(ld3r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23535,20 +24205,20 @@ pub unsafe fn vld4_u64(a: *const u64) -> uint64x1x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t {
-    transmute(vld4_s8(transmute(a)))
+pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t {
+    transmute(vld3_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u8)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(ld3r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23558,392 +24228,775 @@ pub unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_u8(a: *const u8) -> uint8x16x4_t {
-    transmute(vld4q_s8(transmute(a)))
+pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t {
+    transmute(vld3q_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4_u16(a: *const u16) -> uint16x4x4_t {
-    transmute(vld4_s16(transmute(a)))
+#[cfg(target_arch = "arm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f16.p0")]
+        fn _vld3_f16(ptr: *const f16, size: i32) -> float16x4x3_t;
+    }
+    _vld3_f16(a as _, 2)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4q_u16(a: *const u16) -> uint16x8x4_t {
-    transmute(vld4q_s16(transmute(a)))
+#[cfg(target_arch = "arm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld3q_f16(a: *const f16) -> float16x8x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8f16.p0")]
+        fn _vld3q_f16(ptr: *const f16, size: i32) -> float16x8x3_t;
+    }
+    _vld3q_f16(a as _, 2)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    assert_instr(ld3)
 )]
-pub unsafe fn vld4_u32(a: *const u32) -> uint32x2x4_t {
-    transmute(vld4_s32(transmute(a)))
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t {
+    crate::core_arch::macros::deinterleaving_load!(f16, 4, 3, a)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    assert_instr(ld3)
 )]
-pub unsafe fn vld4q_u32(a: *const u32) -> uint32x4x4_t {
-    transmute(vld4q_s32(transmute(a)))
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld3q_f16(a: *const f16) -> float16x8x3_t {
+    crate::core_arch::macros::deinterleaving_load!(f16, 8, 3, a)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4_p8(a: *const p8) -> poly8x8x4_t {
-    transmute(vld4_s8(transmute(a)))
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t {
+    crate::core_arch::macros::deinterleaving_load!(f32, 2, 3, a)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4q_p8(a: *const p8) -> poly8x16x4_t {
-    transmute(vld4q_s8(transmute(a)))
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t {
+    crate::core_arch::macros::deinterleaving_load!(f32, 4, 3, a)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4_p16(a: *const p16) -> poly16x4x4_t {
-    transmute(vld4_s16(transmute(a)))
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t {
+    crate::core_arch::macros::deinterleaving_load!(i8, 8, 3, a)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4q_p16(a: *const p16) -> poly16x8x4_t {
-    transmute(vld4q_s16(transmute(a)))
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t {
+    crate::core_arch::macros::deinterleaving_load!(i8, 16, 3, a)
 }
-#[doc = "Store SIMD&FP register (immediate offset)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vldrq_p128)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vldrq_p128(a: *const p128) -> p128 {
-    *a
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t {
+    crate::core_arch::macros::deinterleaving_load!(i16, 4, 3, a)
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmax)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t {
+    crate::core_arch::macros::deinterleaving_load!(i16, 8, 3, a)
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t {
+    crate::core_arch::macros::deinterleaving_load!(i32, 2, 3, a)
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t {
+    crate::core_arch::macros::deinterleaving_load!(i32, 4, 3, a)
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2f32.p0")]
+        fn _vld3_f32(ptr: *const i8, size: i32) -> float32x2x3_t;
+    }
+    _vld3_f32(a as *const i8, 4)
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f32.p0")]
+        fn _vld3q_f32(ptr: *const i8, size: i32) -> float32x4x3_t;
+    }
+    _vld3q_f32(a as *const i8, 4)
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i8.p0")]
+        fn _vld3_s8(ptr: *const i8, size: i32) -> int8x8x3_t;
+    }
+    _vld3_s8(a as *const i8, 1)
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v16i8.p0")]
+        fn _vld3q_s8(ptr: *const i8, size: i32) -> int8x16x3_t;
+    }
+    _vld3q_s8(a as *const i8, 1)
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i16.p0")]
+        fn _vld3_s16(ptr: *const i8, size: i32) -> int16x4x3_t;
+    }
+    _vld3_s16(a as *const i8, 2)
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i16.p0")]
+        fn _vld3q_s16(ptr: *const i8, size: i32) -> int16x8x3_t;
+    }
+    _vld3q_s16(a as *const i8, 2)
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2i32.p0")]
+        fn _vld3_s32(ptr: *const i8, size: i32) -> int32x2x3_t;
+    }
+    _vld3_s32(a as *const i8, 4)
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i32.p0")]
+        fn _vld3q_s32(ptr: *const i8, size: i32) -> int32x4x3_t;
+    }
+    _vld3q_s32(a as *const i8, 4)
+}
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+pub unsafe fn vld3_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x3_t) -> float16x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmax.v4f16"
-        )]
-        fn _vmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f16.p0")]
+        fn _vld3_lane_f16(
+            ptr: *const f16,
+            a: float16x4_t,
+            b: float16x4_t,
+            c: float16x4_t,
+            n: i32,
+            size: i32,
+        ) -> float16x4x3_t;
     }
-    unsafe { _vmax_f16(a, b) }
+    _vld3_lane_f16(a as _, b.0, b.1, b.2, LANE, 2)
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f16)"]
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld3q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x3_t) -> float16x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8f16.p0")]
+        fn _vld3q_lane_f16(
+            ptr: *const f16,
+            a: float16x8_t,
+            b: float16x8_t,
+            c: float16x8_t,
+            n: i32,
+            size: i32,
+        ) -> float16x8x3_t;
+    }
+    _vld3q_lane_f16(a as _, b.0, b.1, b.2, LANE, 2)
+}
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmax)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    assert_instr(ld3, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+pub unsafe fn vld3_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x3_t) -> float16x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8f16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmax.v8f16"
+            link_name = "llvm.aarch64.neon.ld3lane.v4f16.p0"
         )]
-        fn _vmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
+        fn _vld3_lane_f16(
+            a: float16x4_t,
+            b: float16x4_t,
+            c: float16x4_t,
+            n: i64,
+            ptr: *const f16,
+        ) -> float16x4x3_t;
     }
-    unsafe { _vmaxq_f16(a, b) }
+    _vld3_lane_f16(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f32)"]
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmax)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    assert_instr(ld3, LANE = 0)
 )]
-pub fn vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld3q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x3_t) -> float16x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmax.v2f32"
+            link_name = "llvm.aarch64.neon.ld3lane.v8f16.p0"
         )]
-        fn _vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+        fn _vld3q_lane_f16(
+            a: float16x8_t,
+            b: float16x8_t,
+            c: float16x8_t,
+            n: i64,
+            ptr: *const f16,
+        ) -> float16x8x3_t;
     }
-    unsafe { _vmax_f32(a, b) }
+    _vld3q_lane_f16(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmax)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x3_t) -> float32x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmax.v4f32"
+            link_name = "llvm.aarch64.neon.ld3lane.v2f32.p0"
         )]
-        fn _vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+        fn _vld3_lane_f32(
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> float32x2x3_t;
     }
-    unsafe { _vmaxq_f32(a, b) }
+    _vld3_lane_f32(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smax)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe {
-        let mask: int8x8_t = simd_ge(a, b);
-        simd_select(mask, a, b)
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x3_t) -> float32x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3lane.v4f32.p0"
+        )]
+        fn _vld3q_lane_f32(
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> float32x4x3_t;
     }
+    _vld3q_lane_f32(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld3_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x3_t) -> float32x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2f32.p0")]
+        fn _vld3_lane_f32(
+            ptr: *const i8,
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            n: i32,
+            size: i32,
+        ) -> float32x2x3_t;
+    }
+    _vld3_lane_f32(a as _, b.0, b.1, b.2, LANE, 4)
+}
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smax)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe {
-        let mask: int8x16_t = simd_ge(a, b);
-        simd_select(mask, a, b)
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x3_t) -> int8x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3lane.v8i8.p0"
+        )]
+        fn _vld3_lane_s8(
+            a: int8x8_t,
+            b: int8x8_t,
+            c: int8x8_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int8x8x3_t;
     }
+    _vld3_lane_s8(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s16)"]
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x3_t) -> int16x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3lane.v4i16.p0"
+        )]
+        fn _vld3_lane_s16(
+            a: int16x4_t,
+            b: int16x4_t,
+            c: int16x4_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int16x4x3_t;
+    }
+    _vld3_lane_s16(b.0, b.1, b.2, LANE as i64, a as _)
+}
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x3_t) -> int16x8x3_t {
+    static_assert_uimm_bits!(LANE, 4);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3lane.v8i16.p0"
+        )]
+        fn _vld3q_lane_s16(
+            a: int16x8_t,
+            b: int16x8_t,
+            c: int16x8_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int16x8x3_t;
+    }
+    _vld3q_lane_s16(b.0, b.1, b.2, LANE as i64, a as _)
+}
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x3_t) -> int32x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3lane.v2i32.p0"
+        )]
+        fn _vld3_lane_s32(
+            a: int32x2_t,
+            b: int32x2_t,
+            c: int32x2_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int32x2x3_t;
+    }
+    _vld3_lane_s32(b.0, b.1, b.2, LANE as i64, a as _)
+}
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x3_t) -> int32x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3lane.v4i32.p0"
+        )]
+        fn _vld3q_lane_s32(
+            a: int32x4_t,
+            b: int32x4_t,
+            c: int32x4_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int32x4x3_t;
+    }
+    _vld3q_lane_s32(b.0, b.1, b.2, LANE as i64, a as _)
+}
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld3_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x3_t) -> int8x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i8.p0")]
+        fn _vld3_lane_s8(
+            ptr: *const i8,
+            a: int8x8_t,
+            b: int8x8_t,
+            c: int8x8_t,
+            n: i32,
+            size: i32,
+        ) -> int8x8x3_t;
+    }
+    _vld3_lane_s8(a as _, b.0, b.1, b.2, LANE, 1)
+}
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld3_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x3_t) -> int16x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i16.p0")]
+        fn _vld3_lane_s16(
+            ptr: *const i8,
+            a: int16x4_t,
+            b: int16x4_t,
+            c: int16x4_t,
+            n: i32,
+            size: i32,
+        ) -> int16x4x3_t;
+    }
+    _vld3_lane_s16(a as _, b.0, b.1, b.2, LANE, 2)
+}
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld3q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x3_t) -> int16x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i16.p0")]
+        fn _vld3q_lane_s16(
+            ptr: *const i8,
+            a: int16x8_t,
+            b: int16x8_t,
+            c: int16x8_t,
+            n: i32,
+            size: i32,
+        ) -> int16x8x3_t;
+    }
+    _vld3q_lane_s16(a as _, b.0, b.1, b.2, LANE, 2)
+}
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld3_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x3_t) -> int32x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2i32.p0")]
+        fn _vld3_lane_s32(
+            ptr: *const i8,
+            a: int32x2_t,
+            b: int32x2_t,
+            c: int32x2_t,
+            n: i32,
+            size: i32,
+        ) -> int32x2x3_t;
+    }
+    _vld3_lane_s32(a as _, b.0, b.1, b.2, LANE, 4)
+}
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld3q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x3_t) -> int32x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i32.p0")]
+        fn _vld3q_lane_s32(
+            ptr: *const i8,
+            a: int32x4_t,
+            b: int32x4_t,
+            c: int32x4_t,
+            n: i32,
+            size: i32,
+        ) -> int32x4x3_t;
+    }
+    _vld3q_lane_s32(a as _, b.0, b.1, b.2, LANE, 4)
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smax)
+    assert_instr(ld3, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -23952,22 +25005,23 @@ pub fn vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe {
-        let mask: int16x4_t = simd_ge(a, b);
-        simd_select(mask, a, b)
-    }
+pub unsafe fn vld3_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x3_t) -> uint8x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld3_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smax)
+    assert_instr(ld3, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -23976,22 +25030,23 @@ pub fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe {
-        let mask: int16x8_t = simd_ge(a, b);
-        simd_select(mask, a, b)
-    }
+pub unsafe fn vld3_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x3_t) -> uint16x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld3_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smax)
+    assert_instr(ld3, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24000,22 +25055,23 @@ pub fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe {
-        let mask: int32x2_t = simd_ge(a, b);
-        simd_select(mask, a, b)
-    }
+pub unsafe fn vld3q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x3_t) -> uint16x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld3q_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smax)
+    assert_instr(ld3, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24024,22 +25080,23 @@ pub fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe {
-        let mask: int32x4_t = simd_ge(a, b);
-        simd_select(mask, a, b)
-    }
+pub unsafe fn vld3_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x3_t) -> uint32x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
+    transmute(vld3_lane_s32::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umax)
+    assert_instr(ld3, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24048,22 +25105,23 @@ pub fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe {
-        let mask: uint8x8_t = simd_ge(a, b);
-        simd_select(mask, a, b)
-    }
+pub unsafe fn vld3q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x3_t) -> uint32x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld3q_lane_s32::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umax)
+    assert_instr(ld3, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24072,22 +25130,23 @@ pub fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe {
-        let mask: uint8x16_t = simd_ge(a, b);
-        simd_select(mask, a, b)
-    }
+pub unsafe fn vld3_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x3_t) -> poly8x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld3_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umax)
+    assert_instr(ld3, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24096,22 +25155,23 @@ pub fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe {
-        let mask: uint16x4_t = simd_ge(a, b);
-        simd_select(mask, a, b)
-    }
+pub unsafe fn vld3_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x3_t) -> poly16x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld3_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umax)
+    assert_instr(ld3, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24120,21 +25180,21 @@ pub fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe {
-        let mask: uint16x8_t = simd_ge(a, b);
-        simd_select(mask, a, b)
-    }
+pub unsafe fn vld3q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x3_t) -> poly16x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld3q_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umax)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24144,105 +25204,48 @@ pub fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe {
-        let mask: uint32x2_t = simd_ge(a, b);
-        simd_select(mask, a, b)
-    }
+pub unsafe fn vld3_p64(a: *const p64) -> poly64x1x3_t {
+    transmute(vld3_s64(transmute(a)))
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umax)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe {
-        let mask: uint32x4_t = simd_ge(a, b);
-        simd_select(mask, a, b)
-    }
-}
-#[doc = "Floating-point Maximum Number (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmaxnm)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmaxnm.v4f16"
-        )]
-        fn _vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
-    }
-    unsafe { _vmaxnm_f16(a, b) }
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Floating-point Maximum Number (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmaxnm)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v8f16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmaxnm.v8f16"
-        )]
-        fn _vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v1i64.p0")]
+        fn _vld3_s64(ptr: *const i8, size: i32) -> int64x1x3_t;
     }
-    unsafe { _vmaxnmq_f16(a, b) }
+    _vld3_s64(a as *const i8, 8)
 }
-#[doc = "Floating-point Maximum Number (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmaxnm)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24252,26 +25255,20 @@ pub fn vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v2f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmaxnm.v2f32"
-        )]
-        fn _vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-    unsafe { _vmaxnm_f32(a, b) }
+pub unsafe fn vld3_u64(a: *const u64) -> uint64x1x3_t {
+    transmute(vld3_s64(transmute(a)))
 }
-#[doc = "Floating-point Maximum Number (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmaxnm)
+    assert_instr(ld3)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24281,86 +25278,20 @@ pub fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmaxnm.v4f32"
-        )]
-        fn _vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-    unsafe { _vmaxnmq_f32(a, b) }
-}
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmin)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmin.v4f16"
-        )]
-        fn _vmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
-    }
-    unsafe { _vmin_f16(a, b) }
-}
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmin)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8f16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmin.v8f16"
-        )]
-        fn _vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
-    }
-    unsafe { _vminq_f16(a, b) }
+pub unsafe fn vld3_u8(a: *const u8) -> uint8x8x3_t {
+    transmute(vld3_s8(transmute(a)))
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmin)
+    assert_instr(ld3)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24370,26 +25301,20 @@ pub fn vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmin.v2f32"
-        )]
-        fn _vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-    unsafe { _vmin_f32(a, b) }
+pub unsafe fn vld3q_u8(a: *const u8) -> uint8x16x3_t {
+    transmute(vld3q_s8(transmute(a)))
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmin)
+    assert_instr(ld3)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24399,26 +25324,20 @@ pub fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmin.v4f32"
-        )]
-        fn _vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-    unsafe { _vminq_f32(a, b) }
+pub unsafe fn vld3_u16(a: *const u16) -> uint16x4x3_t {
+    transmute(vld3_s16(transmute(a)))
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smin)
+    assert_instr(ld3)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24428,21 +25347,20 @@ pub fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe {
-        let mask: int8x8_t = simd_le(a, b);
-        simd_select(mask, a, b)
-    }
+pub unsafe fn vld3q_u16(a: *const u16) -> uint16x8x3_t {
+    transmute(vld3q_s16(transmute(a)))
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smin)
+    assert_instr(ld3)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24452,21 +25370,20 @@ pub fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe {
-        let mask: int8x16_t = simd_le(a, b);
-        simd_select(mask, a, b)
-    }
+pub unsafe fn vld3_u32(a: *const u32) -> uint32x2x3_t {
+    transmute(vld3_s32(transmute(a)))
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smin)
+    assert_instr(ld3)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24476,21 +25393,20 @@ pub fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe {
-        let mask: int16x4_t = simd_le(a, b);
-        simd_select(mask, a, b)
-    }
+pub unsafe fn vld3q_u32(a: *const u32) -> uint32x4x3_t {
+    transmute(vld3q_s32(transmute(a)))
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smin)
+    assert_instr(ld3)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24500,21 +25416,20 @@ pub fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe {
-        let mask: int16x8_t = simd_le(a, b);
-        simd_select(mask, a, b)
-    }
+pub unsafe fn vld3_p8(a: *const p8) -> poly8x8x3_t {
+    transmute(vld3_s8(transmute(a)))
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smin)
+    assert_instr(ld3)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24524,21 +25439,20 @@ pub fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe {
-        let mask: int32x2_t = simd_le(a, b);
-        simd_select(mask, a, b)
-    }
+pub unsafe fn vld3q_p8(a: *const p8) -> poly8x16x3_t {
+    transmute(vld3q_s8(transmute(a)))
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smin)
+    assert_instr(ld3)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24548,21 +25462,20 @@ pub fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe {
-        let mask: int32x4_t = simd_le(a, b);
-        simd_select(mask, a, b)
-    }
+pub unsafe fn vld3_p16(a: *const p16) -> poly16x4x3_t {
+    transmute(vld3_s16(transmute(a)))
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umin)
+    assert_instr(ld3)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24572,230 +25485,426 @@ pub fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe {
-        let mask: uint8x8_t = simd_le(a, b);
-        simd_select(mask, a, b)
+pub unsafe fn vld3q_p16(a: *const p16) -> poly16x8x3_t {
+    transmute(vld3q_s16(transmute(a)))
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld3q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x3_t) -> float32x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f32.p0")]
+        fn _vld3q_lane_f32(
+            ptr: *const i8,
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            n: i32,
+            size: i32,
+        ) -> float32x4x3_t;
     }
+    _vld3q_lane_f32(a as _, b.0, b.1, b.2, LANE, 4)
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u8)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umin)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe {
-        let mask: uint8x16_t = simd_le(a, b);
-        simd_select(mask, a, b)
+#[cfg(target_arch = "arm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f16.p0")]
+        fn _vld4_dup_f16(ptr: *const f16, size: i32) -> float16x4x4_t;
     }
+    _vld4_dup_f16(a as _, 2)
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umin)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe {
-        let mask: uint16x4_t = simd_le(a, b);
-        simd_select(mask, a, b)
+#[cfg(target_arch = "arm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld4q_dup_f16(a: *const f16) -> float16x8x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8f16.p0")]
+        fn _vld4q_dup_f16(ptr: *const f16, size: i32) -> float16x8x4_t;
     }
+    _vld4q_dup_f16(a as _, 2)
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umin)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    assert_instr(ld4r)
 )]
-pub fn vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe {
-        let mask: uint16x8_t = simd_le(a, b);
-        simd_select(mask, a, b)
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v4f16.p0"
+        )]
+        fn _vld4_dup_f16(ptr: *const f16) -> float16x4x4_t;
     }
+    _vld4_dup_f16(a as _)
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umin)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    assert_instr(ld4r)
 )]
-pub fn vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe {
-        let mask: uint32x2_t = simd_le(a, b);
-        simd_select(mask, a, b)
-    }
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld4q_dup_f16(a: *const f16) -> float16x8x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v8f16.p0"
+        )]
+        fn _vld4q_dup_f16(ptr: *const f16) -> float16x8x4_t;
+    }
+    _vld4q_dup_f16(a as _)
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2f32.p0")]
+        fn _vld4_dup_f32(ptr: *const i8, size: i32) -> float32x2x4_t;
+    }
+    _vld4_dup_f32(a as *const i8, 4)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f32.p0")]
+        fn _vld4q_dup_f32(ptr: *const i8, size: i32) -> float32x4x4_t;
+    }
+    _vld4q_dup_f32(a as *const i8, 4)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i8.p0")]
+        fn _vld4_dup_s8(ptr: *const i8, size: i32) -> int8x8x4_t;
+    }
+    _vld4_dup_s8(a as *const i8, 1)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v16i8.p0")]
+        fn _vld4q_dup_s8(ptr: *const i8, size: i32) -> int8x16x4_t;
+    }
+    _vld4q_dup_s8(a as *const i8, 1)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i16.p0")]
+        fn _vld4_dup_s16(ptr: *const i8, size: i32) -> int16x4x4_t;
+    }
+    _vld4_dup_s16(a as *const i8, 2)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i16.p0")]
+        fn _vld4q_dup_s16(ptr: *const i8, size: i32) -> int16x8x4_t;
+    }
+    _vld4q_dup_s16(a as *const i8, 2)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2i32.p0")]
+        fn _vld4_dup_s32(ptr: *const i8, size: i32) -> int32x2x4_t;
+    }
+    _vld4_dup_s32(a as *const i8, 4)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i32.p0")]
+        fn _vld4q_dup_s32(ptr: *const i8, size: i32) -> int32x4x4_t;
+    }
+    _vld4q_dup_s32(a as *const i8, 4)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umin)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe {
-        let mask: uint32x4_t = simd_le(a, b);
-        simd_select(mask, a, b)
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v2f32.p0.p0"
+        )]
+        fn _vld4_dup_f32(ptr: *const f32) -> float32x2x4_t;
     }
+    _vld4_dup_f32(a as _)
 }
-#[doc = "Floating-point Minimum Number (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fminnm)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fminnm.v4f16"
+            link_name = "llvm.aarch64.neon.ld4r.v4f32.p0.p0"
         )]
-        fn _vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
+        fn _vld4q_dup_f32(ptr: *const f32) -> float32x4x4_t;
     }
-    unsafe { _vminnm_f16(a, b) }
+    _vld4q_dup_f32(a as _)
 }
-#[doc = "Floating-point Minimum Number (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fminnm)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v8f16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fminnm.v8f16"
+            link_name = "llvm.aarch64.neon.ld4r.v8i8.p0.p0"
         )]
-        fn _vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
+        fn _vld4_dup_s8(ptr: *const i8) -> int8x8x4_t;
     }
-    unsafe { _vminnmq_f16(a, b) }
+    _vld4_dup_s8(a as _)
 }
-#[doc = "Floating-point Minimum Number (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fminnm)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v2f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fminnm.v2f32"
+            link_name = "llvm.aarch64.neon.ld4r.v16i8.p0.p0"
         )]
-        fn _vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+        fn _vld4q_dup_s8(ptr: *const i8) -> int8x16x4_t;
     }
-    unsafe { _vminnm_f32(a, b) }
+    _vld4q_dup_s8(a as _)
 }
-#[doc = "Floating-point Minimum Number (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v4i16.p0.p0"
+        )]
+        fn _vld4_dup_s16(ptr: *const i16) -> int16x4x4_t;
+    }
+    _vld4_dup_s16(a as _)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v8i16.p0.p0"
+        )]
+        fn _vld4q_dup_s16(ptr: *const i16) -> int16x8x4_t;
+    }
+    _vld4q_dup_s16(a as _)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v2i32.p0.p0"
+        )]
+        fn _vld4_dup_s32(ptr: *const i32) -> int32x2x4_t;
+    }
+    _vld4_dup_s32(a as _)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v4i32.p0.p0"
+        )]
+        fn _vld4q_dup_s32(ptr: *const i32) -> int32x4x4_t;
+    }
+    _vld4q_dup_s32(a as _)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v1i64.p0.p0"
+        )]
+        fn _vld4_dup_s64(ptr: *const i64) -> int64x1x4_t;
+    }
+    _vld4_dup_s64(a as _)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fminnm)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24805,26 +25914,36 @@ pub fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+pub unsafe fn vld4_dup_p64(a: *const p64) -> poly64x1x4_t {
+    transmute(vld4_dup_s64(transmute(a)))
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(nop))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fminnm.v4f32"
-        )]
-        fn _vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v1i64.p0")]
+        fn _vld4_dup_s64(ptr: *const i8, size: i32) -> int64x1x4_t;
     }
-    unsafe { _vminnmq_f32(a, b) }
+    _vld4_dup_s64(a as *const i8, 8)
 }
-#[doc = "Floating-point multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_f32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24834,18 +25953,20 @@ pub fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+pub unsafe fn vld4_dup_u64(a: *const u64) -> uint64x1x4_t {
+    transmute(vld4_dup_s64(transmute(a)))
 }
-#[doc = "Floating-point multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_f32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24855,20 +25976,21 @@ pub fn vmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t {
+    transmute(vld4_dup_s8(transmute(a)))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_f32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    assert_instr(ld4r)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24877,25 +25999,21 @@ pub fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_lane_f32<const LANE: i32>(
-    a: float32x2_t,
-    b: float32x2_t,
-    c: float32x2_t,
-) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmla_f32(a, b, vdup_lane_f32::<LANE>(c))
+pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t {
+    transmute(vld4q_dup_s8(transmute(a)))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_f32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    assert_instr(ld4r)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24904,25 +26022,21 @@ pub fn vmla_lane_f32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_laneq_f32<const LANE: i32>(
-    a: float32x2_t,
-    b: float32x2_t,
-    c: float32x4_t,
-) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmla_f32(a, b, vdup_laneq_f32::<LANE>(c))
+pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t {
+    transmute(vld4_dup_s16(transmute(a)))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_f32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    assert_instr(ld4r)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24931,25 +26045,21 @@ pub fn vmla_laneq_f32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_lane_f32<const LANE: i32>(
-    a: float32x4_t,
-    b: float32x4_t,
-    c: float32x2_t,
-) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlaq_f32(a, b, vdupq_lane_f32::<LANE>(c))
+pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t {
+    transmute(vld4q_dup_s16(transmute(a)))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_f32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    assert_instr(ld4r)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24958,25 +26068,21 @@ pub fn vmlaq_lane_f32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_laneq_f32<const LANE: i32>(
-    a: float32x4_t,
-    b: float32x4_t,
-    c: float32x4_t,
-) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlaq_f32(a, b, vdupq_laneq_f32::<LANE>(c))
+pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t {
+    transmute(vld4_dup_s32(transmute(a)))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(ld4r)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24985,21 +26091,21 @@ pub fn vmlaq_laneq_f32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmla_s16(a, b, vdup_lane_s16::<LANE>(c))
+pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t {
+    transmute(vld4q_dup_s32(transmute(a)))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(ld4r)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25008,21 +26114,21 @@ pub fn vmla_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmla_u16(a, b, vdup_lane_u16::<LANE>(c))
+pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t {
+    transmute(vld4_dup_s8(transmute(a)))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(ld4r)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25031,21 +26137,21 @@ pub fn vmla_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x4_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmla_s16(a, b, vdup_laneq_s16::<LANE>(c))
+pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t {
+    transmute(vld4q_dup_s8(transmute(a)))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(ld4r)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25054,21 +26160,21 @@ pub fn vmla_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmla_u16(a, b, vdup_laneq_u16::<LANE>(c))
+pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t {
+    transmute(vld4_dup_s16(transmute(a)))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(ld4r)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25077,609 +26183,812 @@ pub fn vmla_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x8
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlaq_s16(a, b, vdupq_lane_s16::<LANE>(c))
+pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t {
+    transmute(vld4q_dup_s16(transmute(a)))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmlaq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlaq_u16(a, b, vdupq_lane_u16::<LANE>(c))
+#[cfg(target_arch = "arm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f16.p0")]
+        fn _vld4_f16(ptr: *const f16, size: i32) -> float16x4x4_t;
+    }
+    _vld4_f16(a as _, 2)
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmlaq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlaq_s16(a, b, vdupq_laneq_s16::<LANE>(c))
+#[cfg(target_arch = "arm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld4q_f16(a: *const f16) -> float16x8x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8f16.p0")]
+        fn _vld4q_f16(ptr: *const f16, size: i32) -> float16x8x4_t;
+    }
+    _vld4q_f16(a as _, 2)
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    assert_instr(ld4)
 )]
-pub fn vmlaq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlaq_u16(a, b, vdupq_laneq_u16::<LANE>(c))
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t {
+    crate::core_arch::macros::deinterleaving_load!(f16, 4, 4, a)
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
+#[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    assert_instr(ld4)
 )]
-pub fn vmla_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmla_s32(a, b, vdup_lane_s32::<LANE>(c))
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld4q_f16(a: *const f16) -> float16x8x4_t {
+    crate::core_arch::macros::deinterleaving_load!(f16, 8, 4, a)
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmla_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmla_u32(a, b, vdup_lane_u32::<LANE>(c))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t {
+    crate::core_arch::macros::deinterleaving_load!(f32, 2, 4, a)
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmla_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmla_s32(a, b, vdup_laneq_s32::<LANE>(c))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t {
+    crate::core_arch::macros::deinterleaving_load!(f32, 4, 4, a)
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmla_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmla_u32(a, b, vdup_laneq_u32::<LANE>(c))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t {
+    crate::core_arch::macros::deinterleaving_load!(i8, 8, 4, a)
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmlaq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlaq_s32(a, b, vdupq_lane_s32::<LANE>(c))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t {
+    crate::core_arch::macros::deinterleaving_load!(i8, 16, 4, a)
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmlaq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlaq_u32(a, b, vdupq_lane_u32::<LANE>(c))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t {
+    crate::core_arch::macros::deinterleaving_load!(i16, 4, 4, a)
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmlaq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlaq_s32(a, b, vdupq_laneq_s32::<LANE>(c))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t {
+    crate::core_arch::macros::deinterleaving_load!(i16, 8, 4, a)
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmlaq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlaq_u32(a, b, vdupq_laneq_u32::<LANE>(c))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t {
+    crate::core_arch::macros::deinterleaving_load!(i32, 2, 4, a)
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_f32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmla_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
-    vmla_f32(a, b, vdup_n_f32(c))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t {
+    crate::core_arch::macros::deinterleaving_load!(i32, 4, 4, a)
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_f32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmlaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
-    vmlaq_f32(a, b, vdupq_n_f32(c))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2f32.p0")]
+        fn _vld4_f32(ptr: *const i8, size: i32) -> float32x2x4_t;
+    }
+    _vld4_f32(a as *const i8, 4)
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmla_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t {
-    vmla_s16(a, b, vdup_n_s16(c))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f32.p0")]
+        fn _vld4q_f32(ptr: *const i8, size: i32) -> float32x4x4_t;
+    }
+    _vld4q_f32(a as *const i8, 4)
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmlaq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t {
-    vmlaq_s16(a, b, vdupq_n_s16(c))
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i8.p0")]
+        fn _vld4_s8(ptr: *const i8, size: i32) -> int8x8x4_t;
+    }
+    _vld4_s8(a as *const i8, 1)
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmla_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t {
-    vmla_u16(a, b, vdup_n_u16(c))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v16i8.p0")]
+        fn _vld4q_s8(ptr: *const i8, size: i32) -> int8x16x4_t;
+    }
+    _vld4q_s8(a as *const i8, 1)
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmlaq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t {
-    vmlaq_u16(a, b, vdupq_n_u16(c))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i16.p0")]
+        fn _vld4_s16(ptr: *const i8, size: i32) -> int16x4x4_t;
+    }
+    _vld4_s16(a as *const i8, 2)
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmla_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t {
-    vmla_s32(a, b, vdup_n_s32(c))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i16.p0")]
+        fn _vld4q_s16(ptr: *const i8, size: i32) -> int16x8x4_t;
+    }
+    _vld4q_s16(a as *const i8, 2)
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmlaq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t {
-    vmlaq_s32(a, b, vdupq_n_s32(c))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2i32.p0")]
+        fn _vld4_s32(ptr: *const i8, size: i32) -> int32x2x4_t;
+    }
+    _vld4_s32(a as *const i8, 4)
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmla_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t {
-    vmla_u32(a, b, vdup_n_u32(c))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i32.p0")]
+        fn _vld4q_s32(ptr: *const i8, size: i32) -> int32x4x4_t;
+    }
+    _vld4q_s32(a as *const i8, 4)
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u32)"]
+#[doc = "Load multiple 4-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmlaq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t {
-    vmlaq_u32(a, b, vdupq_n_u32(c))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld4_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x4_t) -> float16x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f16.p0")]
+        fn _vld4_lane_f16(
+            ptr: *const f16,
+            a: float16x4_t,
+            b: float16x4_t,
+            c: float16x4_t,
+            d: float16x4_t,
+            n: i32,
+            size: i32,
+        ) -> float16x4x4_t;
+    }
+    _vld4_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s8)"]
+#[doc = "Load multiple 4-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmla_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld4q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x4_t) -> float16x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8f16.p0")]
+        fn _vld4q_lane_f16(
+            ptr: *const f16,
+            a: float16x8_t,
+            b: float16x8_t,
+            c: float16x8_t,
+            d: float16x8_t,
+            n: i32,
+            size: i32,
+        ) -> float16x8x4_t;
+    }
+    _vld4q_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s8)"]
+#[doc = "Load multiple 4-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
+#[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    assert_instr(ld4, LANE = 0)
 )]
-pub fn vmlaq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld4_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x4_t) -> float16x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v4f16.p0"
+        )]
+        fn _vld4_lane_f16(
+            a: float16x4_t,
+            b: float16x4_t,
+            c: float16x4_t,
+            d: float16x4_t,
+            n: i64,
+            ptr: *const f16,
+        ) -> float16x4x4_t;
+    }
+    _vld4_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s16)"]
+#[doc = "Load multiple 4-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    assert_instr(ld4, LANE = 0)
 )]
-pub fn vmla_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld4q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x4_t) -> float16x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v8f16.p0"
+        )]
+        fn _vld4q_lane_f16(
+            a: float16x8_t,
+            b: float16x8_t,
+            c: float16x8_t,
+            d: float16x8_t,
+            n: i64,
+            ptr: *const f16,
+        ) -> float16x8x4_t;
+    }
+    _vld4q_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmlaq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x4_t) -> float32x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v2f32.p0"
+        )]
+        fn _vld4_lane_f32(
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            d: float32x2_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> float32x2x4_t;
+    }
+    _vld4_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmla_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x4_t) -> float32x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v4f32.p0"
+        )]
+        fn _vld4q_lane_f32(
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            d: float32x4_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> float32x4x4_t;
+    }
+    _vld4q_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vmlaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x4_t) -> int8x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v8i8.p0"
+        )]
+        fn _vld4_lane_s8(
+            a: int8x8_t,
+            b: int8x8_t,
+            c: int8x8_t,
+            d: int8x8_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int8x8x4_t;
+    }
+    _vld4_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u8)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x4_t) -> int16x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v4i16.p0"
+        )]
+        fn _vld4_lane_s16(
+            a: int16x4_t,
+            b: int16x4_t,
+            c: int16x4_t,
+            d: int16x4_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int16x4x4_t;
+    }
+    _vld4_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x4_t) -> int16x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v8i16.p0"
+        )]
+        fn _vld4q_lane_s16(
+            a: int16x8_t,
+            b: int16x8_t,
+            c: int16x8_t,
+            d: int16x8_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int16x8x4_t;
+    }
+    _vld4q_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x4_t) -> int32x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v2i32.p0"
+        )]
+        fn _vld4_lane_s32(
+            a: int32x2_t,
+            b: int32x2_t,
+            c: int32x2_t,
+            d: int32x2_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int32x2x4_t;
+    }
+    _vld4_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x4_t) -> int32x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v4i32.p0"
+        )]
+        fn _vld4q_lane_s32(
+            a: int32x4_t,
+            b: int32x4_t,
+            c: int32x4_t,
+            d: int32x4_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int32x4x4_t;
+    }
+    _vld4q_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x4_t) -> float32x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2f32.p0")]
+        fn _vld4_lane_f32(
+            ptr: *const i8,
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            d: float32x2_t,
+            n: i32,
+            size: i32,
+        ) -> float32x2x4_t;
+    }
+    _vld4_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x4_t) -> float32x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f32.p0")]
+        fn _vld4q_lane_f32(
+            ptr: *const i8,
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            d: float32x4_t,
+            n: i32,
+            size: i32,
+        ) -> float32x4x4_t;
+    }
+    _vld4q_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x4_t) -> int8x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i8.p0")]
+        fn _vld4_lane_s8(
+            ptr: *const i8,
+            a: int8x8_t,
+            b: int8x8_t,
+            c: int8x8_t,
+            d: int8x8_t,
+            n: i32,
+            size: i32,
+        ) -> int8x8x4_t;
+    }
+    _vld4_lane_s8(a as _, b.0, b.1, b.2, b.3, LANE, 1)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x4_t) -> int16x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i16.p0")]
+        fn _vld4_lane_s16(
+            ptr: *const i8,
+            a: int16x4_t,
+            b: int16x4_t,
+            c: int16x4_t,
+            d: int16x4_t,
+            n: i32,
+            size: i32,
+        ) -> int16x4x4_t;
+    }
+    _vld4_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x4_t) -> int16x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i16.p0")]
+        fn _vld4q_lane_s16(
+            ptr: *const i8,
+            a: int16x8_t,
+            b: int16x8_t,
+            c: int16x8_t,
+            d: int16x8_t,
+            n: i32,
+            size: i32,
+        ) -> int16x8x4_t;
+    }
+    _vld4q_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x4_t) -> int32x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2i32.p0")]
+        fn _vld4_lane_s32(
+            ptr: *const i8,
+            a: int32x2_t,
+            b: int32x2_t,
+            c: int32x2_t,
+            d: int32x2_t,
+            n: i32,
+            size: i32,
+        ) -> int32x2x4_t;
+    }
+    _vld4_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x4_t) -> int32x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i32.p0")]
+        fn _vld4q_lane_s32(
+            ptr: *const i8,
+            a: int32x4_t,
+            b: int32x4_t,
+            c: int32x4_t,
+            d: int32x4_t,
+            n: i32,
+            size: i32,
+        ) -> int32x4x4_t;
+    }
+    _vld4q_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25688,19 +26997,23 @@ pub fn vmlaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+pub unsafe fn vld4_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x4_t) -> uint8x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld4_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u8)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(ld4, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25709,19 +27022,23 @@ pub fn vmla_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+pub unsafe fn vld4_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x4_t) -> uint16x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld4_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(ld4, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25730,19 +27047,23 @@ pub fn vmlaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+pub unsafe fn vld4q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x4_t) -> uint16x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld4q_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(ld4, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25751,19 +27072,23 @@ pub fn vmla_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+pub unsafe fn vld4_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x4_t) -> uint32x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    transmute(vld4_lane_s32::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(ld4, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25772,19 +27097,23 @@ pub fn vmlaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+pub unsafe fn vld4q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x4_t) -> uint32x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld4q_lane_s32::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(ld4, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25793,20 +27122,23 @@ pub fn vmla_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+pub unsafe fn vld4_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x4_t) -> poly8x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld4_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal, LANE = 1)
+    assert_instr(ld4, LANE = 0)
 )]
-#[rustc_legacy_const_generics(3)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25815,21 +27147,23 @@ pub fn vmlaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+pub unsafe fn vld4_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x4_t) -> poly16x4x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    vmlal_s16(a, b, vdup_lane_s16::<LANE>(c))
+    transmute(vld4_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal, LANE = 1)
+    assert_instr(ld4, LANE = 0)
 )]
-#[rustc_legacy_const_generics(3)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25838,21 +27172,22 @@ pub fn vmlal_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t {
+pub unsafe fn vld4q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x4_t) -> poly16x8x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    vmlal_s16(a, b, vdup_laneq_s16::<LANE>(c))
+    transmute(vld4q_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal, LANE = 1)
+    assert_instr(nop)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25861,21 +27196,49 @@ pub fn vmlal_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlal_s32(a, b, vdup_lane_s32::<LANE>(c))
+pub unsafe fn vld4_p64(a: *const p64) -> poly64x1x4_t {
+    transmute(vld4_s64(transmute(a)))
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t {
+    crate::ptr::read_unaligned(a.cast())
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v1i64.p0")]
+        fn _vld4_s64(ptr: *const i8, size: i32) -> int64x1x4_t;
+    }
+    _vld4_s64(a as *const i8, 8)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal, LANE = 1)
+    assert_instr(nop)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25884,21 +27247,21 @@ pub fn vmlal_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlal_s32(a, b, vdup_laneq_s32::<LANE>(c))
+pub unsafe fn vld4_u64(a: *const u64) -> uint64x1x4_t {
+    transmute(vld4_s64(transmute(a)))
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal, LANE = 1)
+    assert_instr(ld4)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25907,21 +27270,21 @@ pub fn vmlal_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlal_u16(a, b, vdup_lane_u16::<LANE>(c))
+pub unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t {
+    transmute(vld4_s8(transmute(a)))
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal, LANE = 1)
+    assert_instr(ld4)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25930,21 +27293,21 @@ pub fn vmlal_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x4
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlal_u16(a, b, vdup_laneq_u16::<LANE>(c))
-}
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u32)"]
+pub unsafe fn vld4q_u8(a: *const u8) -> uint8x16x4_t {
+    transmute(vld4q_s8(transmute(a)))
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal, LANE = 1)
+    assert_instr(ld4)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25953,21 +27316,21 @@ pub fn vmlal_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlal_u32(a, b, vdup_lane_u32::<LANE>(c))
+pub unsafe fn vld4_u16(a: *const u16) -> uint16x4x4_t {
+    transmute(vld4_s16(transmute(a)))
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal, LANE = 1)
+    assert_instr(ld4)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25976,19 +27339,20 @@ pub fn vmlal_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x2
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlal_u32(a, b, vdup_laneq_u32::<LANE>(c))
+pub unsafe fn vld4q_u16(a: *const u16) -> uint16x8x4_t {
+    transmute(vld4q_s16(transmute(a)))
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal)
+    assert_instr(ld4)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -25998,18 +27362,20 @@ pub fn vmlal_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
-    vmlal_s16(a, b, vdup_n_s16(c))
+pub unsafe fn vld4_u32(a: *const u32) -> uint32x2x4_t {
+    transmute(vld4_s32(transmute(a)))
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal)
+    assert_instr(ld4)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26019,18 +27385,20 @@ pub fn vmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
-    vmlal_s32(a, b, vdup_n_s32(c))
+pub unsafe fn vld4q_u32(a: *const u32) -> uint32x4x4_t {
+    transmute(vld4q_s32(transmute(a)))
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal)
+    assert_instr(ld4)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26040,18 +27408,20 @@ pub fn vmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
-    vmlal_u16(a, b, vdup_n_u16(c))
+pub unsafe fn vld4_p8(a: *const p8) -> poly8x8x4_t {
+    transmute(vld4_s8(transmute(a)))
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal)
+    assert_instr(ld4)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26061,18 +27431,20 @@ pub fn vmlal_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
-    vmlal_u32(a, b, vdup_n_u32(c))
+pub unsafe fn vld4q_p8(a: *const p8) -> poly8x16x4_t {
+    transmute(vld4q_s8(transmute(a)))
 }
-#[doc = "Signed multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s8)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal)
+    assert_instr(ld4)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26082,18 +27454,20 @@ pub fn vmlal_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
-    unsafe { simd_add(a, vmull_s8(b, c)) }
+pub unsafe fn vld4_p16(a: *const p16) -> poly16x4x4_t {
+    transmute(vld4_s16(transmute(a)))
 }
-#[doc = "Signed multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal)
+    assert_instr(ld4)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26103,18 +27477,20 @@ pub fn vmlal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    unsafe { simd_add(a, vmull_s16(b, c)) }
+pub unsafe fn vld4q_p16(a: *const p16) -> poly16x8x4_t {
+    transmute(vld4q_s16(transmute(a)))
 }
-#[doc = "Signed multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s32)"]
+#[doc = "Store SIMD&FP register (immediate offset)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vldrq_p128)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26124,60 +27500,78 @@ pub fn vmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    unsafe { simd_add(a, vmull_s32(b, c)) }
+pub unsafe fn vldrq_p128(a: *const p128) -> p128 {
+    *a
 }
-#[doc = "Unsigned multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u8)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal)
+    assert_instr(fmax)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
-    unsafe { simd_add(a, vmull_u8(b, c)) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmax.v4f16"
+        )]
+        fn _vmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
+    }
+    unsafe { _vmax_f16(a, b) }
 }
-#[doc = "Unsigned multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u16)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal)
+    assert_instr(fmax)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
-    unsafe { simd_add(a, vmull_u16(b, c)) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmax.v8f16"
+        )]
+        fn _vmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
+    }
+    unsafe { _vmaxq_f16(a, b) }
 }
-#[doc = "Unsigned multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u32)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal)
+    assert_instr(fmax)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26187,18 +27581,26 @@ pub fn vmlal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
-    unsafe { simd_add(a, vmull_u32(b, c)) }
+pub fn vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmax.v2f32"
+        )]
+        fn _vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    unsafe { _vmax_f32(a, b) }
 }
-#[doc = "Floating-point multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_f32)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(fmax)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26208,18 +27610,26 @@ pub fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+pub fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmax.v4f32"
+        )]
+        fn _vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    unsafe { _vmaxq_f32(a, b) }
 }
-#[doc = "Floating-point multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_f32)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(smax)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26229,20 +27639,22 @@ pub fn vmls_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+pub fn vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe {
+        let mask: int8x8_t = simd_ge(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_f32)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    assert_instr(smax)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -26251,25 +27663,22 @@ pub fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_lane_f32<const LANE: i32>(
-    a: float32x2_t,
-    b: float32x2_t,
-    c: float32x2_t,
-) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmls_f32(a, b, vdup_lane_f32::<LANE>(c))
+pub fn vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe {
+        let mask: int8x16_t = simd_ge(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_f32)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    assert_instr(smax)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -26278,25 +27687,22 @@ pub fn vmls_lane_f32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_laneq_f32<const LANE: i32>(
-    a: float32x2_t,
-    b: float32x2_t,
-    c: float32x4_t,
-) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmls_f32(a, b, vdup_laneq_f32::<LANE>(c))
+pub fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe {
+        let mask: int16x4_t = simd_ge(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_f32)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    assert_instr(smax)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -26305,25 +27711,22 @@ pub fn vmls_laneq_f32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_lane_f32<const LANE: i32>(
-    a: float32x4_t,
-    b: float32x4_t,
-    c: float32x2_t,
-) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlsq_f32(a, b, vdupq_lane_f32::<LANE>(c))
+pub fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe {
+        let mask: int16x8_t = simd_ge(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_f32)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    assert_instr(smax)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -26332,25 +27735,22 @@ pub fn vmlsq_lane_f32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_laneq_f32<const LANE: i32>(
-    a: float32x4_t,
-    b: float32x4_t,
-    c: float32x4_t,
-) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsq_f32(a, b, vdupq_laneq_f32::<LANE>(c))
+pub fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe {
+        let mask: int32x2_t = simd_ge(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s16)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(smax)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -26359,21 +27759,22 @@ pub fn vmlsq_laneq_f32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmls_s16(a, b, vdup_lane_s16::<LANE>(c))
+pub fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe {
+        let mask: int32x4_t = simd_ge(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u16)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(umax)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -26382,21 +27783,22 @@ pub fn vmls_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmls_u16(a, b, vdup_lane_u16::<LANE>(c))
+pub fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe {
+        let mask: uint8x8_t = simd_ge(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s16)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(umax)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -26405,21 +27807,22 @@ pub fn vmls_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x4_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmls_s16(a, b, vdup_laneq_s16::<LANE>(c))
+pub fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe {
+        let mask: uint8x16_t = simd_ge(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u16)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(umax)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -26428,21 +27831,22 @@ pub fn vmls_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmls_u16(a, b, vdup_laneq_u16::<LANE>(c))
+pub fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe {
+        let mask: uint16x4_t = simd_ge(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s16)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(umax)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -26451,21 +27855,22 @@ pub fn vmls_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x8
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsq_s16(a, b, vdupq_lane_s16::<LANE>(c))
+pub fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe {
+        let mask: uint16x8_t = simd_ge(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u16)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(umax)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -26474,21 +27879,22 @@ pub fn vmlsq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsq_u16(a, b, vdupq_lane_u16::<LANE>(c))
+pub fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe {
+        let mask: uint32x2_t = simd_ge(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s16)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(umax)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -26497,67 +27903,82 @@ pub fn vmlsq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x4
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlsq_s16(a, b, vdupq_laneq_s16::<LANE>(c))
+pub fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe {
+        let mask: uint32x4_t = simd_ge(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u16)"]
+#[doc = "Floating-point Maximum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(fmaxnm)
 )]
-#[rustc_legacy_const_generics(3)]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlsq_u16(a, b, vdupq_laneq_u16::<LANE>(c))
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnm.v4f16"
+        )]
+        fn _vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
+    }
+    unsafe { _vmaxnm_f16(a, b) }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s32)"]
+#[doc = "Floating-point Maximum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(fmaxnm)
 )]
-#[rustc_legacy_const_generics(3)]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmls_s32(a, b, vdup_lane_s32::<LANE>(c))
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v8f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnm.v8f16"
+        )]
+        fn _vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
+    }
+    unsafe { _vmaxnmq_f16(a, b) }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u32)"]
+#[doc = "Floating-point Maximum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(fmaxnm)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -26566,21 +27987,27 @@ pub fn vmls_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmls_u32(a, b, vdup_lane_u32::<LANE>(c))
+pub fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnm.v2f32"
+        )]
+        fn _vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    unsafe { _vmaxnm_f32(a, b) }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s32)"]
+#[doc = "Floating-point Maximum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(fmaxnm)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -26589,67 +28016,87 @@ pub fn vmls_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x2_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmls_s32(a, b, vdup_laneq_s32::<LANE>(c))
+pub fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnm.v4f32"
+        )]
+        fn _vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    unsafe { _vmaxnmq_f32(a, b) }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u32)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(fmin)
 )]
-#[rustc_legacy_const_generics(3)]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmls_u32(a, b, vdup_laneq_u32::<LANE>(c))
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmin.v4f16"
+        )]
+        fn _vmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
+    }
+    unsafe { _vmin_f16(a, b) }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s32)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(fmin)
 )]
-#[rustc_legacy_const_generics(3)]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlsq_s32(a, b, vdupq_lane_s32::<LANE>(c))
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmin.v8f16"
+        )]
+        fn _vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
+    }
+    unsafe { _vminq_f16(a, b) }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u32)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(fmin)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -26658,21 +28105,27 @@ pub fn vmlsq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlsq_u32(a, b, vdupq_lane_u32::<LANE>(c))
+pub fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmin.v2f32"
+        )]
+        fn _vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    unsafe { _vmin_f32(a, b) }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s32)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(fmin)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -26681,21 +28134,27 @@ pub fn vmlsq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x2
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsq_s32(a, b, vdupq_laneq_s32::<LANE>(c))
+pub fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmin.v4f32"
+        )]
+        fn _vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    unsafe { _vminq_f32(a, b) }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u32)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(smin)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -26704,19 +28163,21 @@ pub fn vmlsq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsq_u32(a, b, vdupq_laneq_u32::<LANE>(c))
+pub fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe {
+        let mask: int8x8_t = simd_le(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_f32)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(smin)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26726,18 +28187,21 @@ pub fn vmlsq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
-    vmls_f32(a, b, vdup_n_f32(c))
+pub fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe {
+        let mask: int8x16_t = simd_le(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_f32)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(smin)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26747,18 +28211,21 @@ pub fn vmls_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
-    vmlsq_f32(a, b, vdupq_n_f32(c))
+pub fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe {
+        let mask: int16x4_t = simd_le(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s16)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(smin)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26768,18 +28235,21 @@ pub fn vmlsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t {
-    vmls_s16(a, b, vdup_n_s16(c))
+pub fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe {
+        let mask: int16x8_t = simd_le(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s16)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(smin)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26789,18 +28259,21 @@ pub fn vmls_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t {
-    vmlsq_s16(a, b, vdupq_n_s16(c))
+pub fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe {
+        let mask: int32x2_t = simd_le(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u16)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(smin)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26810,18 +28283,21 @@ pub fn vmlsq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t {
-    vmls_u16(a, b, vdup_n_u16(c))
+pub fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe {
+        let mask: int32x4_t = simd_le(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u16)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(umin)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26831,18 +28307,21 @@ pub fn vmls_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t {
-    vmlsq_u16(a, b, vdupq_n_u16(c))
+pub fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe {
+        let mask: uint8x8_t = simd_le(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s32)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(umin)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26852,18 +28331,21 @@ pub fn vmlsq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t {
-    vmls_s32(a, b, vdup_n_s32(c))
+pub fn vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe {
+        let mask: uint8x16_t = simd_le(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s32)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(umin)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26873,18 +28355,21 @@ pub fn vmls_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t {
-    vmlsq_s32(a, b, vdupq_n_s32(c))
+pub fn vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe {
+        let mask: uint16x4_t = simd_le(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u32)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(umin)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26894,18 +28379,21 @@ pub fn vmlsq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t {
-    vmls_u32(a, b, vdup_n_u32(c))
+pub fn vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe {
+        let mask: uint16x8_t = simd_le(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u32)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(umin)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26915,18 +28403,21 @@ pub fn vmls_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t {
-    vmlsq_u32(a, b, vdupq_n_u32(c))
+pub fn vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe {
+        let mask: uint32x2_t = simd_le(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s8)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(umin)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26936,60 +28427,81 @@ pub fn vmlsq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+pub fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe {
+        let mask: uint32x4_t = simd_le(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s8)"]
+#[doc = "Floating-point Minimum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(fminnm)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.v4f16"
+        )]
+        fn _vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
+    }
+    unsafe { _vminnm_f16(a, b) }
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s16)"]
+#[doc = "Floating-point Minimum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(fminnm)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v8f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.v8f16"
+        )]
+        fn _vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
+    }
+    unsafe { _vminnmq_f16(a, b) }
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s16)"]
+#[doc = "Floating-point Minimum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(fminnm)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26999,18 +28511,26 @@ pub fn vmls_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+pub fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.v2f32"
+        )]
+        fn _vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    unsafe { _vminnm_f32(a, b) }
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s32)"]
+#[doc = "Floating-point Minimum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(fminnm)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27020,18 +28540,26 @@ pub fn vmlsq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+pub fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.v4f32"
+        )]
+        fn _vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    unsafe { _vminnmq_f32(a, b) }
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s32)"]
+#[doc = "Floating-point multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27041,18 +28569,18 @@ pub fn vmls_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+pub fn vmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u8)"]
+#[doc = "Floating-point multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27062,19 +28590,20 @@ pub fn vmlsq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+pub fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u8)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(fmul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27083,19 +28612,25 @@ pub fn vmls_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+pub fn vmla_lane_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x2_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmla_f32(a, b, vdup_lane_f32::<LANE>(c))
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(fmul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27104,19 +28639,25 @@ pub fn vmlsq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+pub fn vmla_laneq_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x4_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmla_f32(a, b, vdup_laneq_f32::<LANE>(c))
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(fmul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27125,19 +28666,25 @@ pub fn vmls_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+pub fn vmlaq_lane_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x2_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlaq_f32(a, b, vdupq_lane_f32::<LANE>(c))
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(fmul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27146,19 +28693,25 @@ pub fn vmlsq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
-}
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u32)"]
+pub fn vmlaq_laneq_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x4_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlaq_f32(a, b, vdupq_laneq_f32::<LANE>(c))
+}
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27167,18 +28720,19 @@ pub fn vmls_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+pub fn vmla_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmla_s16(a, b, vdup_lane_s16::<LANE>(c))
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl, LANE = 1)
+    assert_instr(mla, LANE = 1)
 )]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(
@@ -27189,19 +28743,19 @@ pub fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+pub fn vmla_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    vmlsl_s16(a, b, vdup_lane_s16::<LANE>(c))
+    vmla_u16(a, b, vdup_lane_u16::<LANE>(c))
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl, LANE = 1)
+    assert_instr(mla, LANE = 1)
 )]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(
@@ -27212,19 +28766,19 @@ pub fn vmlsl_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t {
+pub fn vmla_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    vmlsl_s16(a, b, vdup_laneq_s16::<LANE>(c))
+    vmla_s16(a, b, vdup_laneq_s16::<LANE>(c))
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl, LANE = 1)
+    assert_instr(mla, LANE = 1)
 )]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(
@@ -27235,19 +28789,19 @@ pub fn vmlsl_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlsl_s32(a, b, vdup_lane_s32::<LANE>(c))
+pub fn vmla_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmla_u16(a, b, vdup_laneq_u16::<LANE>(c))
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl, LANE = 1)
+    assert_instr(mla, LANE = 1)
 )]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(
@@ -27258,19 +28812,19 @@ pub fn vmlsl_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t {
+pub fn vmlaq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    vmlsl_s32(a, b, vdup_laneq_s32::<LANE>(c))
+    vmlaq_s16(a, b, vdupq_lane_s16::<LANE>(c))
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl, LANE = 1)
+    assert_instr(mla, LANE = 1)
 )]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(
@@ -27281,19 +28835,19 @@ pub fn vmlsl_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
+pub fn vmlaq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    vmlsl_u16(a, b, vdup_lane_u16::<LANE>(c))
+    vmlaq_u16(a, b, vdupq_lane_u16::<LANE>(c))
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl, LANE = 1)
+    assert_instr(mla, LANE = 1)
 )]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(
@@ -27304,19 +28858,19 @@ pub fn vmlsl_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x4
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t {
+pub fn vmlaq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 3);
-    vmlsl_u16(a, b, vdup_laneq_u16::<LANE>(c))
+    vmlaq_s16(a, b, vdupq_laneq_s16::<LANE>(c))
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl, LANE = 1)
+    assert_instr(mla, LANE = 1)
 )]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(
@@ -27327,19 +28881,19 @@ pub fn vmlsl_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlsl_u32(a, b, vdup_lane_u32::<LANE>(c))
+pub fn vmlaq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlaq_u16(a, b, vdupq_laneq_u16::<LANE>(c))
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl, LANE = 1)
+    assert_instr(mla, LANE = 1)
 )]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(
@@ -27350,20 +28904,21 @@ pub fn vmlsl_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x2
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsl_u32(a, b, vdup_laneq_u32::<LANE>(c))
+pub fn vmla_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmla_s32(a, b, vdup_lane_s32::<LANE>(c))
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27372,19 +28927,21 @@ pub fn vmlsl_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
-    vmlsl_s16(a, b, vdup_n_s16(c))
+pub fn vmla_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmla_u32(a, b, vdup_lane_u32::<LANE>(c))
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27393,19 +28950,21 @@ pub fn vmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
-    vmlsl_s32(a, b, vdup_n_s32(c))
+pub fn vmla_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmla_s32(a, b, vdup_laneq_s32::<LANE>(c))
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27414,19 +28973,21 @@ pub fn vmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
-    vmlsl_u16(a, b, vdup_n_u16(c))
+pub fn vmla_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmla_u32(a, b, vdup_laneq_u32::<LANE>(c))
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27435,19 +28996,21 @@ pub fn vmlsl_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
-    vmlsl_u32(a, b, vdup_n_u32(c))
+pub fn vmlaq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlaq_s32(a, b, vdupq_lane_s32::<LANE>(c))
 }
-#[doc = "Signed multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s8)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27456,19 +29019,21 @@ pub fn vmlsl_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
-    unsafe { simd_sub(a, vmull_s8(b, c)) }
+pub fn vmlaq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlaq_u32(a, b, vdupq_lane_u32::<LANE>(c))
 }
-#[doc = "Signed multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27477,19 +29042,21 @@ pub fn vmlsl_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    unsafe { simd_sub(a, vmull_s16(b, c)) }
+pub fn vmlaq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlaq_s32(a, b, vdupq_laneq_s32::<LANE>(c))
 }
-#[doc = "Signed multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27498,18 +29065,19 @@ pub fn vmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    unsafe { simd_sub(a, vmull_s32(b, c)) }
+pub fn vmlaq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlaq_u32(a, b, vdupq_laneq_u32::<LANE>(c))
 }
-#[doc = "Unsigned multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u8)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27519,18 +29087,18 @@ pub fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
-    unsafe { simd_sub(a, vmull_u8(b, c)) }
+pub fn vmla_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
+    vmla_f32(a, b, vdup_n_f32(c))
 }
-#[doc = "Unsigned multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27540,18 +29108,18 @@ pub fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
-    unsafe { simd_sub(a, vmull_u16(b, c)) }
+pub fn vmlaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
+    vmlaq_f32(a, b, vdupq_n_f32(c))
 }
-#[doc = "Unsigned multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27561,108 +29129,81 @@ pub fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
-    unsafe { simd_sub(a, vmull_u32(b, c)) }
+pub fn vmla_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t {
+    vmla_s16(a, b, vdup_n_s16(c))
 }
-#[doc = "8-bit integer matrix multiply-accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmmlaq_s32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s16)"]
 #[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smmla)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smmla.v4i32.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.smmla.v4i32.v16i8")]
-        fn _vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t;
-    }
-    unsafe { _vmmlaq_s32(a, b, c) }
+pub fn vmlaq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t {
+    vmlaq_s16(a, b, vdupq_n_s16(c))
 }
-#[doc = "8-bit integer matrix multiply-accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmmlaq_u32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u16)"]
 #[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ummla)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ummla.v4i32.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ummla.v4i32.v16i8")]
-        fn _vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t;
-    }
-    unsafe { _vmmlaq_u32(a, b, c) }
+pub fn vmla_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t {
+    vmla_u16(a, b, vdup_n_u16(c))
 }
-#[doc = "Duplicate element to vector"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_f16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mla)
 )]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vmov_n_f16(a: f16) -> float16x4_t {
-    vdup_n_f16(a)
-}
-#[doc = "Duplicate element to vector"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_f16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vmovq_n_f16(a: f16) -> float16x8_t {
-    vdupq_n_f16(a)
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vmlaq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t {
+    vmlaq_u16(a, b, vdupq_n_u16(c))
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_f32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27672,18 +29213,18 @@ pub fn vmovq_n_f16(a: f16) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmov_n_f32(value: f32) -> float32x2_t {
-    vdup_n_f32(value)
+pub fn vmla_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t {
+    vmla_s32(a, b, vdup_n_s32(c))
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_p16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27693,18 +29234,18 @@ pub fn vmov_n_f32(value: f32) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmov_n_p16(value: p16) -> poly16x4_t {
-    vdup_n_p16(value)
+pub fn vmlaq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t {
+    vmlaq_s32(a, b, vdupq_n_s32(c))
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_p8)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27714,18 +29255,18 @@ pub fn vmov_n_p16(value: p16) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmov_n_p8(value: p8) -> poly8x8_t {
-    vdup_n_p8(value)
+pub fn vmla_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t {
+    vmla_u32(a, b, vdup_n_u32(c))
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27735,18 +29276,18 @@ pub fn vmov_n_p8(value: p8) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmov_n_s16(value: i16) -> int16x4_t {
-    vdup_n_s16(value)
+pub fn vmlaq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t {
+    vmlaq_u32(a, b, vdupq_n_u32(c))
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s32)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27756,18 +29297,18 @@ pub fn vmov_n_s16(value: i16) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmov_n_s32(value: i32) -> int32x2_t {
-    vdup_n_s32(value)
+pub fn vmla_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s64)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmov)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27777,18 +29318,18 @@ pub fn vmov_n_s32(value: i32) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmov_n_s64(value: i64) -> int64x1_t {
-    vdup_n_s64(value)
+pub fn vmlaq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s8)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27798,18 +29339,18 @@ pub fn vmov_n_s64(value: i64) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmov_n_s8(value: i8) -> int8x8_t {
-    vdup_n_s8(value)
+pub fn vmla_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u16)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27819,18 +29360,18 @@ pub fn vmov_n_s8(value: i8) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmov_n_u16(value: u16) -> uint16x4_t {
-    vdup_n_u16(value)
+pub fn vmlaq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u32)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27840,18 +29381,18 @@ pub fn vmov_n_u16(value: u16) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmov_n_u32(value: u32) -> uint32x2_t {
-    vdup_n_u32(value)
+pub fn vmla_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u64)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmov)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27861,18 +29402,18 @@ pub fn vmov_n_u32(value: u32) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmov_n_u64(value: u64) -> uint64x1_t {
-    vdup_n_u64(value)
+pub fn vmlaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u8)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27882,18 +29423,18 @@ pub fn vmov_n_u64(value: u64) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmov_n_u8(value: u8) -> uint8x8_t {
-    vdup_n_u8(value)
+pub fn vmla_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_f32)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27903,18 +29444,18 @@ pub fn vmov_n_u8(value: u8) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovq_n_f32(value: f32) -> float32x4_t {
-    vdupq_n_f32(value)
+pub fn vmlaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_p16)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27924,18 +29465,18 @@ pub fn vmovq_n_f32(value: f32) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovq_n_p16(value: p16) -> poly16x8_t {
-    vdupq_n_p16(value)
+pub fn vmla_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_p8)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27945,18 +29486,18 @@ pub fn vmovq_n_p16(value: p16) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovq_n_p8(value: p8) -> poly8x16_t {
-    vdupq_n_p8(value)
+pub fn vmlaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s16)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27966,18 +29507,18 @@ pub fn vmovq_n_p8(value: p8) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovq_n_s16(value: i16) -> int16x8_t {
-    vdupq_n_s16(value)
+pub fn vmla_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s32)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27987,19 +29528,20 @@ pub fn vmovq_n_s16(value: i16) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovq_n_s32(value: i32) -> int32x4_t {
-    vdupq_n_s32(value)
+pub fn vmlaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s64)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(smlal, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28008,19 +29550,21 @@ pub fn vmovq_n_s32(value: i32) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovq_n_s64(value: i64) -> int64x2_t {
-    vdupq_n_s64(value)
+pub fn vmlal_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlal_s16(a, b, vdup_lane_s16::<LANE>(c))
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s8)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(smlal, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28029,19 +29573,21 @@ pub fn vmovq_n_s64(value: i64) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovq_n_s8(value: i8) -> int8x16_t {
-    vdupq_n_s8(value)
+pub fn vmlal_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlal_s16(a, b, vdup_laneq_s16::<LANE>(c))
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u16)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(smlal, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28050,19 +29596,21 @@ pub fn vmovq_n_s8(value: i8) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovq_n_u16(value: u16) -> uint16x8_t {
-    vdupq_n_u16(value)
+pub fn vmlal_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlal_s32(a, b, vdup_lane_s32::<LANE>(c))
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u32)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(smlal, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28071,19 +29619,21 @@ pub fn vmovq_n_u16(value: u16) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovq_n_u32(value: u32) -> uint32x4_t {
-    vdupq_n_u32(value)
+pub fn vmlal_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlal_s32(a, b, vdup_laneq_s32::<LANE>(c))
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u64)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(umlal, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28092,19 +29642,21 @@ pub fn vmovq_n_u32(value: u32) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovq_n_u64(value: u64) -> uint64x2_t {
-    vdupq_n_u64(value)
+pub fn vmlal_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlal_u16(a, b, vdup_lane_u16::<LANE>(c))
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u8)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(umlal, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28113,19 +29665,21 @@ pub fn vmovq_n_u64(value: u64) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovq_n_u8(value: u8) -> uint8x16_t {
-    vdupq_n_u8(value)
+pub fn vmlal_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlal_u16(a, b, vdup_laneq_u16::<LANE>(c))
 }
-#[doc = "Vector long move."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s16)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sxtl)
+    assert_instr(umlal, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28134,19 +29688,21 @@ pub fn vmovq_n_u8(value: u8) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovl_s16(a: int16x4_t) -> int32x4_t {
-    unsafe { simd_cast(a) }
+pub fn vmlal_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlal_u32(a, b, vdup_lane_u32::<LANE>(c))
 }
-#[doc = "Vector long move."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s32)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sxtl)
+    assert_instr(umlal, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28155,18 +29711,19 @@ pub fn vmovl_s16(a: int16x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovl_s32(a: int32x2_t) -> int64x2_t {
-    unsafe { simd_cast(a) }
+pub fn vmlal_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlal_u32(a, b, vdup_laneq_u32::<LANE>(c))
 }
-#[doc = "Vector long move."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s8)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sxtl)
+    assert_instr(smlal)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28176,18 +29733,18 @@ pub fn vmovl_s32(a: int32x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovl_s8(a: int8x8_t) -> int16x8_t {
-    unsafe { simd_cast(a) }
+pub fn vmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
+    vmlal_s16(a, b, vdup_n_s16(c))
 }
-#[doc = "Vector long move."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u16)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uxtl)
+    assert_instr(smlal)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28197,18 +29754,18 @@ pub fn vmovl_s8(a: int8x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovl_u16(a: uint16x4_t) -> uint32x4_t {
-    unsafe { simd_cast(a) }
+pub fn vmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
+    vmlal_s32(a, b, vdup_n_s32(c))
 }
-#[doc = "Vector long move."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u32)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uxtl)
+    assert_instr(umlal)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28218,18 +29775,18 @@ pub fn vmovl_u16(a: uint16x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovl_u32(a: uint32x2_t) -> uint64x2_t {
-    unsafe { simd_cast(a) }
+pub fn vmlal_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
+    vmlal_u16(a, b, vdup_n_u16(c))
 }
-#[doc = "Vector long move."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u8)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uxtl)
+    assert_instr(umlal)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28239,18 +29796,18 @@ pub fn vmovl_u32(a: uint32x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovl_u8(a: uint8x8_t) -> uint16x8_t {
-    unsafe { simd_cast(a) }
+pub fn vmlal_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
+    vmlal_u32(a, b, vdup_n_u32(c))
 }
-#[doc = "Vector narrow integer."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s16)"]
+#[doc = "Signed multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(xtn)
+    assert_instr(smlal)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28260,18 +29817,18 @@ pub fn vmovl_u8(a: uint8x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovn_s16(a: int16x8_t) -> int8x8_t {
-    unsafe { simd_cast(a) }
+pub fn vmlal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
+    unsafe { simd_add(a, vmull_s8(b, c)) }
 }
-#[doc = "Vector narrow integer."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s32)"]
+#[doc = "Signed multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(xtn)
+    assert_instr(smlal)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28281,18 +29838,18 @@ pub fn vmovn_s16(a: int16x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovn_s32(a: int32x4_t) -> int16x4_t {
-    unsafe { simd_cast(a) }
+pub fn vmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    unsafe { simd_add(a, vmull_s16(b, c)) }
 }
-#[doc = "Vector narrow integer."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s64)"]
+#[doc = "Signed multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(xtn)
+    assert_instr(smlal)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28302,18 +29859,18 @@ pub fn vmovn_s32(a: int32x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovn_s64(a: int64x2_t) -> int32x2_t {
-    unsafe { simd_cast(a) }
+pub fn vmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    unsafe { simd_add(a, vmull_s32(b, c)) }
 }
-#[doc = "Vector narrow integer."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u16)"]
+#[doc = "Unsigned multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(xtn)
+    assert_instr(umlal)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28323,18 +29880,18 @@ pub fn vmovn_s64(a: int64x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovn_u16(a: uint16x8_t) -> uint8x8_t {
-    unsafe { simd_cast(a) }
+pub fn vmlal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
+    unsafe { simd_add(a, vmull_u8(b, c)) }
 }
-#[doc = "Vector narrow integer."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u32)"]
+#[doc = "Unsigned multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(xtn)
+    assert_instr(umlal)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28344,18 +29901,18 @@ pub fn vmovn_u16(a: uint16x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovn_u32(a: uint32x4_t) -> uint16x4_t {
-    unsafe { simd_cast(a) }
+pub fn vmlal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
+    unsafe { simd_add(a, vmull_u16(b, c)) }
 }
-#[doc = "Vector narrow integer."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u64)"]
+#[doc = "Unsigned multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(xtn)
+    assert_instr(umlal)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28365,63 +29922,62 @@ pub fn vmovn_u32(a: uint32x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovn_u64(a: uint64x2_t) -> uint32x2_t {
-    unsafe { simd_cast(a) }
+pub fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
+    unsafe { simd_add(a, vmull_u32(b, c)) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f16)"]
+#[doc = "Floating-point multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_f32)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f16"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(fmul)
 )]
-#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vmul_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
-    unsafe { simd_mul(a, b) }
+pub fn vmls_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f16)"]
+#[doc = "Floating-point multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_f32)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f16"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(fmul)
 )]
-#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vmulq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
-    unsafe { simd_mul(a, b) }
+pub fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(fmul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28430,19 +29986,25 @@ pub fn vmulq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    unsafe { simd_mul(a, b) }
+pub fn vmls_lane_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x2_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmls_f32(a, b, vdup_lane_f32::<LANE>(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(fmul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28451,68 +30013,79 @@ pub fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    unsafe { simd_mul(a, b) }
+pub fn vmls_laneq_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x4_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmls_f32(a, b, vdup_laneq_f32::<LANE>(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_f32)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(fmul, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vmul_lane_f16<const LANE: i32>(a: float16x4_t, v: float16x4_t) -> float16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_mul(a, vdup_lane_f16::<LANE>(v)) }
+pub fn vmlsq_lane_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x2_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlsq_f32(a, b, vdupq_lane_f32::<LANE>(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_f32)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(fmul, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vmulq_lane_f16<const LANE: i32>(a: float16x8_t, v: float16x4_t) -> float16x8_t {
+pub fn vmlsq_laneq_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x4_t,
+) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_mul(a, vdupq_lane_f16::<LANE>(v)) }
+    vmlsq_f32(a, b, vdupq_laneq_f32::<LANE>(c))
 }
-#[doc = "Floating-point multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 0)
+    assert_instr(mls, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28521,21 +30094,21 @@ pub fn vmulq_lane_f16<const LANE: i32>(a: float16x8_t, v: float16x4_t) -> float1
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { simd_mul(a, vdup_lane_f32::<LANE>(b)) }
+pub fn vmls_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmls_s16(a, b, vdup_lane_s16::<LANE>(c))
 }
-#[doc = "Floating-point multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 0)
+    assert_instr(mls, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28544,21 +30117,21 @@ pub fn vmul_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
+pub fn vmls_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_mul(a, vdup_laneq_f32::<LANE>(b)) }
+    vmls_u16(a, b, vdup_lane_u16::<LANE>(c))
 }
-#[doc = "Floating-point multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 0)
+    assert_instr(mls, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28567,21 +30140,21 @@ pub fn vmul_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float3
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { simd_mul(a, vdupq_lane_f32::<LANE>(b)) }
+pub fn vmls_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmls_s16(a, b, vdup_laneq_s16::<LANE>(c))
 }
-#[doc = "Floating-point multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 0)
+    assert_instr(mls, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28590,21 +30163,21 @@ pub fn vmulq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float3
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_mul(a, vdupq_laneq_f32::<LANE>(b)) }
+pub fn vmls_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmls_u16(a, b, vdup_laneq_u16::<LANE>(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mls, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28613,21 +30186,21 @@ pub fn vmulq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+pub fn vmlsq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_mul(a, vdup_lane_s16::<LANE>(b)) }
+    vmlsq_s16(a, b, vdupq_lane_s16::<LANE>(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mls, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28636,21 +30209,21 @@ pub fn vmul_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
+pub fn vmlsq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_mul(a, vdupq_lane_s16::<LANE>(b)) }
+    vmlsq_u16(a, b, vdupq_lane_u16::<LANE>(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mls, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28659,21 +30232,21 @@ pub fn vmulq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { simd_mul(a, vdup_lane_s32::<LANE>(b)) }
+pub fn vmlsq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlsq_s16(a, b, vdupq_laneq_s16::<LANE>(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mls, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28682,21 +30255,21 @@ pub fn vmul_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { simd_mul(a, vdupq_lane_s32::<LANE>(b)) }
+pub fn vmlsq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlsq_u16(a, b, vdupq_laneq_u16::<LANE>(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mls, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28705,21 +30278,21 @@ pub fn vmulq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_mul(a, vdup_lane_u16::<LANE>(b)) }
+pub fn vmls_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmls_s32(a, b, vdup_lane_s32::<LANE>(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mls, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28728,21 +30301,21 @@ pub fn vmul_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_mul(a, vdupq_lane_u16::<LANE>(b)) }
+pub fn vmls_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmls_u32(a, b, vdup_lane_u32::<LANE>(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mls, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28751,21 +30324,21 @@ pub fn vmulq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint16x8
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { simd_mul(a, vdup_lane_u32::<LANE>(b)) }
+pub fn vmls_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmls_s32(a, b, vdup_laneq_s32::<LANE>(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mls, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28774,21 +30347,21 @@ pub fn vmul_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { simd_mul(a, vdupq_lane_u32::<LANE>(b)) }
+pub fn vmls_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmls_u32(a, b, vdup_laneq_u32::<LANE>(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mls, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28797,21 +30370,21 @@ pub fn vmulq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint32x4
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe { simd_mul(a, vdup_laneq_s16::<LANE>(b)) }
+pub fn vmlsq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlsq_s32(a, b, vdupq_lane_s32::<LANE>(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mls, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28820,21 +30393,21 @@ pub fn vmul_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe { simd_mul(a, vdupq_laneq_s16::<LANE>(b)) }
+pub fn vmlsq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlsq_u32(a, b, vdupq_lane_u32::<LANE>(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mls, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28843,21 +30416,21 @@ pub fn vmulq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
+pub fn vmlsq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_mul(a, vdup_laneq_s32::<LANE>(b)) }
+    vmlsq_s32(a, b, vdupq_laneq_s32::<LANE>(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mls, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28866,21 +30439,20 @@ pub fn vmul_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+pub fn vmlsq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_mul(a, vdupq_laneq_s32::<LANE>(b)) }
+    vmlsq_u32(a, b, vdupq_laneq_u32::<LANE>(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(fmul)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28889,21 +30461,19 @@ pub fn vmulq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe { simd_mul(a, vdup_laneq_u16::<LANE>(b)) }
+pub fn vmls_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
+    vmls_f32(a, b, vdup_n_f32(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(fmul)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28912,21 +30482,19 @@ pub fn vmul_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint16x4
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe { simd_mul(a, vdupq_laneq_u16::<LANE>(b)) }
+pub fn vmlsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
+    vmlsq_f32(a, b, vdupq_n_f32(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mls)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28935,21 +30503,19 @@ pub fn vmulq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_mul(a, vdup_laneq_u32::<LANE>(b)) }
+pub fn vmls_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t {
+    vmls_s16(a, b, vdup_n_s16(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mls)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28958,49 +30524,18 @@ pub fn vmul_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint32x2
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_mul(a, vdupq_laneq_u32::<LANE>(b)) }
-}
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vmul_n_f16(a: float16x4_t, b: f16) -> float16x4_t {
-    unsafe { simd_mul(a, vdup_n_f16(b)) }
-}
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vmulq_n_f16(a: float16x8_t, b: f16) -> float16x8_t {
-    unsafe { simd_mul(a, vdupq_n_f16(b)) }
+pub fn vmlsq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t {
+    vmlsq_s16(a, b, vdupq_n_s16(c))
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29010,18 +30545,18 @@ pub fn vmulq_n_f16(a: float16x8_t, b: f16) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t {
-    unsafe { simd_mul(a, vdup_n_f32(b)) }
+pub fn vmls_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t {
+    vmls_u16(a, b, vdup_n_u16(c))
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29031,18 +30566,18 @@ pub fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t {
-    unsafe { simd_mul(a, vdupq_n_f32(b)) }
+pub fn vmlsq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t {
+    vmlsq_u16(a, b, vdupq_n_u16(c))
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29052,18 +30587,18 @@ pub fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
-    unsafe { simd_mul(a, vdup_n_s16(b)) }
+pub fn vmls_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t {
+    vmls_s32(a, b, vdup_n_s32(c))
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29073,18 +30608,18 @@ pub fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
-    unsafe { simd_mul(a, vdupq_n_s16(b)) }
+pub fn vmlsq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t {
+    vmlsq_s32(a, b, vdupq_n_s32(c))
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29094,18 +30629,18 @@ pub fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
-    unsafe { simd_mul(a, vdup_n_s32(b)) }
+pub fn vmls_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t {
+    vmls_u32(a, b, vdup_n_u32(c))
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29115,18 +30650,18 @@ pub fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
-    unsafe { simd_mul(a, vdupq_n_s32(b)) }
+pub fn vmlsq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t {
+    vmlsq_u32(a, b, vdupq_n_u32(c))
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u16)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29136,18 +30671,18 @@ pub fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t {
-    unsafe { simd_mul(a, vdup_n_u16(b)) }
+pub fn vmls_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u16)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29157,18 +30692,18 @@ pub fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t {
-    unsafe { simd_mul(a, vdupq_n_u16(b)) }
+pub fn vmlsq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u32)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29178,18 +30713,18 @@ pub fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t {
-    unsafe { simd_mul(a, vdup_n_u32(b)) }
+pub fn vmls_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u32)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29199,18 +30734,18 @@ pub fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t {
-    unsafe { simd_mul(a, vdupq_n_u32(b)) }
+pub fn vmlsq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Polynomial multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_p8)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(pmul)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29220,26 +30755,18 @@ pub fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v8i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.pmul.v8i8"
-        )]
-        fn _vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t;
-    }
-    unsafe { _vmul_p8(a, b) }
+pub fn vmls_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Polynomial multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_p8)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(pmul)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29249,26 +30776,18 @@ pub fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v16i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.pmul.v16i8"
-        )]
-        fn _vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t;
-    }
-    unsafe { _vmulq_p8(a, b) }
+pub fn vmlsq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s16)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29278,18 +30797,18 @@ pub fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe { simd_mul(a, b) }
+pub fn vmls_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s16)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29299,18 +30818,18 @@ pub fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe { simd_mul(a, b) }
+pub fn vmlsq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u16)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29320,18 +30839,18 @@ pub fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe { simd_mul(a, b) }
+pub fn vmls_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u16)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29341,18 +30860,18 @@ pub fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe { simd_mul(a, b) }
+pub fn vmlsq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s32)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29362,18 +30881,18 @@ pub fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe { simd_mul(a, b) }
+pub fn vmls_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s32)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29383,19 +30902,20 @@ pub fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe { simd_mul(a, b) }
+pub fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u32)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(smlsl, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29404,19 +30924,21 @@ pub fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe { simd_mul(a, b) }
+pub fn vmlsl_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsl_s16(a, b, vdup_lane_s16::<LANE>(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u32)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(smlsl, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29425,19 +30947,21 @@ pub fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe { simd_mul(a, b) }
+pub fn vmlsl_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlsl_s16(a, b, vdup_laneq_s16::<LANE>(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s8)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(smlsl, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29446,19 +30970,21 @@ pub fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe { simd_mul(a, b) }
+pub fn vmlsl_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlsl_s32(a, b, vdup_lane_s32::<LANE>(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s8)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(smlsl, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29467,19 +30993,21 @@ pub fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe { simd_mul(a, b) }
+pub fn vmlsl_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsl_s32(a, b, vdup_laneq_s32::<LANE>(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u8)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(umlsl, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29488,19 +31016,21 @@ pub fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe { simd_mul(a, b) }
+pub fn vmlsl_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsl_u16(a, b, vdup_lane_u16::<LANE>(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u8)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(umlsl, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29509,20 +31039,21 @@ pub fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe { simd_mul(a, b) }
+pub fn vmlsl_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlsl_u16(a, b, vdup_laneq_u16::<LANE>(c))
 }
-#[doc = "Vector long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s16)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull, LANE = 1)
+    assert_instr(umlsl, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29531,21 +31062,21 @@ pub fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmull_s16(a, vdup_lane_s16::<LANE>(b))
+pub fn vmlsl_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlsl_u32(a, b, vdup_lane_u32::<LANE>(c))
 }
-#[doc = "Vector long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s16)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull, LANE = 1)
+    assert_instr(umlsl, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29554,21 +31085,20 @@ pub fn vmull_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmull_s16(a, vdup_laneq_s16::<LANE>(b))
+pub fn vmlsl_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsl_u32(a, b, vdup_laneq_u32::<LANE>(c))
 }
-#[doc = "Vector long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s32)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull, LANE = 1)
+    assert_instr(smlsl)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29577,21 +31107,19 @@ pub fn vmull_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmull_s32(a, vdup_lane_s32::<LANE>(b))
+pub fn vmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
+    vmlsl_s16(a, b, vdup_n_s16(c))
 }
-#[doc = "Vector long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s32)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull, LANE = 1)
+    assert_instr(smlsl)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29600,21 +31128,19 @@ pub fn vmull_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmull_s32(a, vdup_laneq_s32::<LANE>(b))
+pub fn vmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
+    vmlsl_s32(a, b, vdup_n_s32(c))
 }
-#[doc = "Vector long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u16)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull, LANE = 1)
+    assert_instr(umlsl)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29623,21 +31149,19 @@ pub fn vmull_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmull_u16(a, vdup_lane_u16::<LANE>(b))
+pub fn vmlsl_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
+    vmlsl_u16(a, b, vdup_n_u16(c))
 }
-#[doc = "Vector long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u16)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull, LANE = 1)
+    assert_instr(umlsl)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29646,21 +31170,19 @@ pub fn vmull_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint32x4
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmull_u16(a, vdup_laneq_u16::<LANE>(b))
+pub fn vmlsl_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
+    vmlsl_u32(a, b, vdup_n_u32(c))
 }
-#[doc = "Vector long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u32)"]
+#[doc = "Signed multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull, LANE = 1)
+    assert_instr(smlsl)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29669,21 +31191,19 @@ pub fn vmull_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint32x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmull_u32(a, vdup_lane_u32::<LANE>(b))
+pub fn vmlsl_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
+    unsafe { simd_sub(a, vmull_s8(b, c)) }
 }
-#[doc = "Vector long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u32)"]
+#[doc = "Signed multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull, LANE = 1)
+    assert_instr(smlsl)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29692,19 +31212,18 @@ pub fn vmull_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint64x2
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmull_u32(a, vdup_laneq_u32::<LANE>(b))
+pub fn vmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    unsafe { simd_sub(a, vmull_s16(b, c)) }
 }
-#[doc = "Vector long multiply with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s16)"]
+#[doc = "Signed multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull)
+    assert_instr(smlsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29714,18 +31233,18 @@ pub fn vmull_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint64x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
-    vmull_s16(a, vdup_n_s16(b))
+pub fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    unsafe { simd_sub(a, vmull_s32(b, c)) }
 }
-#[doc = "Vector long multiply with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s32)"]
+#[doc = "Unsigned multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull)
+    assert_instr(umlsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29735,18 +31254,18 @@ pub fn vmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
-    vmull_s32(a, vdup_n_s32(b))
+pub fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
+    unsafe { simd_sub(a, vmull_u8(b, c)) }
 }
-#[doc = "Vector long multiply with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u16)"]
+#[doc = "Unsigned multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull)
+    assert_instr(umlsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29756,18 +31275,18 @@ pub fn vmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_n_u16(a: uint16x4_t, b: u16) -> uint32x4_t {
-    vmull_u16(a, vdup_n_u16(b))
+pub fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
+    unsafe { simd_sub(a, vmull_u16(b, c)) }
 }
-#[doc = "Vector long multiply with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u32)"]
+#[doc = "Unsigned multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull)
+    assert_instr(umlsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29777,89 +31296,108 @@ pub fn vmull_n_u16(a: uint16x4_t, b: u16) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t {
-    vmull_u32(a, vdup_n_u32(b))
+pub fn vmlsl_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
+    unsafe { simd_sub(a, vmull_u32(b, c)) }
 }
-#[doc = "Polynomial multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_p8)"]
+#[doc = "8-bit integer matrix multiply-accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmmlaq_s32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.p8"))]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(pmull)
+    assert_instr(smmla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t {
+pub fn vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.pmull.v8i16"
+            link_name = "llvm.aarch64.neon.smmla.v4i32.v16i8"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullp.v8i16")]
-        fn _vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.smmla.v4i32.v16i8")]
+        fn _vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t;
     }
-    unsafe { _vmull_p8(a, b) }
+    unsafe { _vmmlaq_s32(a, b, c) }
 }
-#[doc = "Signed multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s16)"]
+#[doc = "8-bit integer matrix multiply-accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmmlaq_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s16"))]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull)
+    assert_instr(ummla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
-    unsafe { simd_mul(simd_cast(a), simd_cast(b)) }
+pub fn vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ummla.v4i32.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ummla.v4i32.v16i8")]
+        fn _vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t;
+    }
+    unsafe { _vmmlaq_u32(a, b, c) }
 }
-#[doc = "Signed multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s32)"]
+#[doc = "Duplicate element to vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_f16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    assert_instr(dup)
 )]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vmov_n_f16(a: f16) -> float16x4_t {
+    vdup_n_f16(a)
+}
+#[doc = "Duplicate element to vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_f16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
 )]
-pub fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
-    unsafe { simd_mul(simd_cast(a), simd_cast(b)) }
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vmovq_n_f16(a: f16) -> float16x8_t {
+    vdupq_n_f16(a)
 }
-#[doc = "Signed multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s8)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29869,18 +31407,18 @@ pub fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
-    unsafe { simd_mul(simd_cast(a), simd_cast(b)) }
+pub fn vmov_n_f32(value: f32) -> float32x2_t {
+    vdup_n_f32(value)
 }
-#[doc = "Unsigned multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u8)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_p16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29890,18 +31428,18 @@ pub fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
-    unsafe { simd_mul(simd_cast(a), simd_cast(b)) }
+pub fn vmov_n_p16(value: p16) -> poly16x4_t {
+    vdup_n_p16(value)
 }
-#[doc = "Unsigned multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u16)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29911,18 +31449,18 @@ pub fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
-    unsafe { simd_mul(simd_cast(a), simd_cast(b)) }
+pub fn vmov_n_p8(value: p8) -> poly8x8_t {
+    vdup_n_p8(value)
 }
-#[doc = "Unsigned multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29932,18 +31470,18 @@ pub fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
-    unsafe { simd_mul(simd_cast(a), simd_cast(b)) }
+pub fn vmov_n_s16(value: i16) -> int16x4_t {
+    vdup_n_s16(value)
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_p8)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29953,19 +31491,18 @@ pub fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvn_p8(a: poly8x8_t) -> poly8x8_t {
-    let b = poly8x8_t::splat(255);
-    unsafe { simd_xor(a, b) }
+pub fn vmov_n_s32(value: i32) -> int32x2_t {
+    vdup_n_s32(value)
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s16)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(fmov)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29975,19 +31512,18 @@ pub fn vmvn_p8(a: poly8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvn_s16(a: int16x4_t) -> int16x4_t {
-    let b = int16x4_t::splat(-1);
-    unsafe { simd_xor(a, b) }
+pub fn vmov_n_s64(value: i64) -> int64x1_t {
+    vdup_n_s64(value)
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29997,19 +31533,18 @@ pub fn vmvn_s16(a: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvn_s32(a: int32x2_t) -> int32x2_t {
-    let b = int32x2_t::splat(-1);
-    unsafe { simd_xor(a, b) }
+pub fn vmov_n_s8(value: i8) -> int8x8_t {
+    vdup_n_s8(value)
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s8)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30019,19 +31554,18 @@ pub fn vmvn_s32(a: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvn_s8(a: int8x8_t) -> int8x8_t {
-    let b = int8x8_t::splat(-1);
-    unsafe { simd_xor(a, b) }
+pub fn vmov_n_u16(value: u16) -> uint16x4_t {
+    vdup_n_u16(value)
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u16)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30041,19 +31575,18 @@ pub fn vmvn_s8(a: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvn_u16(a: uint16x4_t) -> uint16x4_t {
-    let b = uint16x4_t::splat(65_535);
-    unsafe { simd_xor(a, b) }
+pub fn vmov_n_u32(value: u32) -> uint32x2_t {
+    vdup_n_u32(value)
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(fmov)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30063,19 +31596,18 @@ pub fn vmvn_u16(a: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvn_u32(a: uint32x2_t) -> uint32x2_t {
-    let b = uint32x2_t::splat(4_294_967_295);
-    unsafe { simd_xor(a, b) }
+pub fn vmov_n_u64(value: u64) -> uint64x1_t {
+    vdup_n_u64(value)
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u8)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30085,19 +31617,18 @@ pub fn vmvn_u32(a: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvn_u8(a: uint8x8_t) -> uint8x8_t {
-    let b = uint8x8_t::splat(255);
-    unsafe { simd_xor(a, b) }
+pub fn vmov_n_u8(value: u8) -> uint8x8_t {
+    vdup_n_u8(value)
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_p8)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30107,19 +31638,18 @@ pub fn vmvn_u8(a: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvnq_p8(a: poly8x16_t) -> poly8x16_t {
-    let b = poly8x16_t::splat(255);
-    unsafe { simd_xor(a, b) }
+pub fn vmovq_n_f32(value: f32) -> float32x4_t {
+    vdupq_n_f32(value)
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s16)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_p16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30129,19 +31659,18 @@ pub fn vmvnq_p8(a: poly8x16_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvnq_s16(a: int16x8_t) -> int16x8_t {
-    let b = int16x8_t::splat(-1);
-    unsafe { simd_xor(a, b) }
+pub fn vmovq_n_p16(value: p16) -> poly16x8_t {
+    vdupq_n_p16(value)
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30151,19 +31680,18 @@ pub fn vmvnq_s16(a: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvnq_s32(a: int32x4_t) -> int32x4_t {
-    let b = int32x4_t::splat(-1);
-    unsafe { simd_xor(a, b) }
+pub fn vmovq_n_p8(value: p8) -> poly8x16_t {
+    vdupq_n_p8(value)
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s8)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30173,19 +31701,18 @@ pub fn vmvnq_s32(a: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvnq_s8(a: int8x16_t) -> int8x16_t {
-    let b = int8x16_t::splat(-1);
-    unsafe { simd_xor(a, b) }
+pub fn vmovq_n_s16(value: i16) -> int16x8_t {
+    vdupq_n_s16(value)
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u16)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30195,19 +31722,18 @@ pub fn vmvnq_s8(a: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvnq_u16(a: uint16x8_t) -> uint16x8_t {
-    let b = uint16x8_t::splat(65_535);
-    unsafe { simd_xor(a, b) }
+pub fn vmovq_n_s32(value: i32) -> int32x4_t {
+    vdupq_n_s32(value)
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30217,19 +31743,18 @@ pub fn vmvnq_u16(a: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvnq_u32(a: uint32x4_t) -> uint32x4_t {
-    let b = uint32x4_t::splat(4_294_967_295);
-    unsafe { simd_xor(a, b) }
+pub fn vmovq_n_s64(value: i64) -> int64x2_t {
+    vdupq_n_s64(value)
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u8)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30239,63 +31764,60 @@ pub fn vmvnq_u32(a: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvnq_u8(a: uint8x16_t) -> uint8x16_t {
-    let b = uint8x16_t::splat(255);
-    unsafe { simd_xor(a, b) }
+pub fn vmovq_n_s8(value: i8) -> int8x16_t {
+    vdupq_n_s8(value)
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f16)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u16)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f16"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fneg)
+    assert_instr(dup)
 )]
-#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vneg_f16(a: float16x4_t) -> float16x4_t {
-    unsafe { simd_neg(a) }
+pub fn vmovq_n_u16(value: u16) -> uint16x8_t {
+    vdupq_n_u16(value)
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f16)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u32)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f16"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fneg)
+    assert_instr(dup)
 )]
-#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vnegq_f16(a: float16x8_t) -> float16x8_t {
-    unsafe { simd_neg(a) }
+pub fn vmovq_n_u32(value: u32) -> uint32x4_t {
+    vdupq_n_u32(value)
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fneg)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30305,18 +31827,18 @@ pub fn vnegq_f16(a: float16x8_t) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vneg_f32(a: float32x2_t) -> float32x2_t {
-    unsafe { simd_neg(a) }
+pub fn vmovq_n_u64(value: u64) -> uint64x2_t {
+    vdupq_n_u64(value)
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fneg)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30326,18 +31848,18 @@ pub fn vneg_f32(a: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vnegq_f32(a: float32x4_t) -> float32x4_t {
-    unsafe { simd_neg(a) }
+pub fn vmovq_n_u8(value: u8) -> uint8x16_t {
+    vdupq_n_u8(value)
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s8)"]
+#[doc = "Vector long move."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(neg)
+    assert_instr(sxtl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30347,18 +31869,18 @@ pub fn vnegq_f32(a: float32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vneg_s8(a: int8x8_t) -> int8x8_t {
-    unsafe { simd_neg(a) }
+pub fn vmovl_s16(a: int16x4_t) -> int32x4_t {
+    unsafe { simd_cast(a) }
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s8)"]
+#[doc = "Vector long move."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(neg)
+    assert_instr(sxtl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30368,18 +31890,18 @@ pub fn vneg_s8(a: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vnegq_s8(a: int8x16_t) -> int8x16_t {
-    unsafe { simd_neg(a) }
+pub fn vmovl_s32(a: int32x2_t) -> int64x2_t {
+    unsafe { simd_cast(a) }
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s16)"]
+#[doc = "Vector long move."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(neg)
+    assert_instr(sxtl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30389,18 +31911,18 @@ pub fn vnegq_s8(a: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vneg_s16(a: int16x4_t) -> int16x4_t {
-    unsafe { simd_neg(a) }
+pub fn vmovl_s8(a: int8x8_t) -> int16x8_t {
+    unsafe { simd_cast(a) }
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s16)"]
+#[doc = "Vector long move."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(neg)
+    assert_instr(uxtl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30410,18 +31932,18 @@ pub fn vneg_s16(a: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vnegq_s16(a: int16x8_t) -> int16x8_t {
-    unsafe { simd_neg(a) }
+pub fn vmovl_u16(a: uint16x4_t) -> uint32x4_t {
+    unsafe { simd_cast(a) }
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s32)"]
+#[doc = "Vector long move."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(neg)
+    assert_instr(uxtl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30431,18 +31953,18 @@ pub fn vnegq_s16(a: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vneg_s32(a: int32x2_t) -> int32x2_t {
-    unsafe { simd_neg(a) }
+pub fn vmovl_u32(a: uint32x2_t) -> uint64x2_t {
+    unsafe { simd_cast(a) }
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s32)"]
+#[doc = "Vector long move."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(neg)
+    assert_instr(uxtl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30452,18 +31974,18 @@ pub fn vneg_s32(a: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vnegq_s32(a: int32x4_t) -> int32x4_t {
-    unsafe { simd_neg(a) }
+pub fn vmovl_u8(a: uint8x8_t) -> uint16x8_t {
+    unsafe { simd_cast(a) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s16)"]
+#[doc = "Vector narrow integer."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(xtn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30473,19 +31995,18 @@ pub fn vnegq_s32(a: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorn_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    let c = int16x4_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, c), a) }
+pub fn vmovn_s16(a: int16x8_t) -> int8x8_t {
+    unsafe { simd_cast(a) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s32)"]
+#[doc = "Vector narrow integer."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(xtn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30495,19 +32016,18 @@ pub fn vorn_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorn_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    let c = int32x2_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, c), a) }
+pub fn vmovn_s32(a: int32x4_t) -> int16x4_t {
+    unsafe { simd_cast(a) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s64)"]
+#[doc = "Vector narrow integer."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(xtn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30517,19 +32037,18 @@ pub fn vorn_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorn_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    let c = int64x1_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, c), a) }
+pub fn vmovn_s64(a: int64x2_t) -> int32x2_t {
+    unsafe { simd_cast(a) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s8)"]
+#[doc = "Vector narrow integer."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(xtn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30539,19 +32058,18 @@ pub fn vorn_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorn_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    let c = int8x8_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, c), a) }
+pub fn vmovn_u16(a: uint16x8_t) -> uint8x8_t {
+    unsafe { simd_cast(a) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s16)"]
+#[doc = "Vector narrow integer."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(xtn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30561,19 +32079,18 @@ pub fn vorn_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vornq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    let c = int16x8_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, c), a) }
+pub fn vmovn_u32(a: uint32x4_t) -> uint16x4_t {
+    unsafe { simd_cast(a) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s32)"]
+#[doc = "Vector narrow integer."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(xtn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30583,63 +32100,62 @@ pub fn vornq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vornq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    let c = int32x4_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, c), a) }
+pub fn vmovn_u64(a: uint64x2_t) -> uint32x2_t {
+    unsafe { simd_cast(a) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s64)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(fmul)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vornq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    let c = int64x2_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, c), a) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vmul_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s8)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(fmul)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vornq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    let c = int8x16_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, c), a) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vmulq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30649,19 +32165,18 @@ pub fn vornq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    let c = int16x4_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
+pub fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30671,64 +32186,68 @@ pub fn vorn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    let c = int32x2_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
+pub fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u64)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(fmul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorn_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    let c = int64x1_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vmul_lane_f16<const LANE: i32>(a: float16x4_t, v: float16x4_t) -> float16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_mul(a, vdup_lane_f16::<LANE>(v)) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u8)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(fmul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    let c = int8x8_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vmulq_lane_f16<const LANE: i32>(a: float16x8_t, v: float16x4_t) -> float16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_mul(a, vdupq_lane_f16::<LANE>(v)) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u16)"]
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(fmul, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30737,20 +32256,21 @@ pub fn vorn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vornq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    let c = int16x8_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
+pub fn vmul_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe { simd_mul(a, vdup_lane_f32::<LANE>(b)) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u32)"]
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(fmul, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30759,20 +32279,21 @@ pub fn vornq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vornq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    let c = int32x4_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
+pub fn vmul_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_mul(a, vdup_laneq_f32::<LANE>(b)) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u64)"]
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(fmul, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30781,20 +32302,21 @@ pub fn vornq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vornq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    let c = int64x2_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
+pub fn vmulq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe { simd_mul(a, vdupq_lane_f32::<LANE>(b)) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u8)"]
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(fmul, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30803,20 +32325,21 @@ pub fn vornq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vornq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    let c = int8x16_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
+pub fn vmulq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_mul(a, vdupq_laneq_f32::<LANE>(b)) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s8)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30825,19 +32348,21 @@ pub fn vornq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorr_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe { simd_or(a, b) }
+pub fn vmul_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_mul(a, vdup_lane_s16::<LANE>(b)) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s8)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30846,19 +32371,21 @@ pub fn vorr_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe { simd_or(a, b) }
+pub fn vmulq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_mul(a, vdupq_lane_s16::<LANE>(b)) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30867,19 +32394,21 @@ pub fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorr_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe { simd_or(a, b) }
+pub fn vmul_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe { simd_mul(a, vdup_lane_s32::<LANE>(b)) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30888,19 +32417,21 @@ pub fn vorr_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorrq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe { simd_or(a, b) }
+pub fn vmulq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe { simd_mul(a, vdupq_lane_s32::<LANE>(b)) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30909,19 +32440,21 @@ pub fn vorrq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorr_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe { simd_or(a, b) }
+pub fn vmul_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_mul(a, vdup_lane_u16::<LANE>(b)) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30930,19 +32463,21 @@ pub fn vorr_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorrq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe { simd_or(a, b) }
+pub fn vmulq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_mul(a, vdupq_lane_u16::<LANE>(b)) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s64)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30951,19 +32486,21 @@ pub fn vorrq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorr_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    unsafe { simd_or(a, b) }
+pub fn vmul_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe { simd_mul(a, vdup_lane_u32::<LANE>(b)) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s64)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30972,19 +32509,21 @@ pub fn vorr_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorrq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    unsafe { simd_or(a, b) }
+pub fn vmulq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe { simd_mul(a, vdupq_lane_u32::<LANE>(b)) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u8)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30993,19 +32532,21 @@ pub fn vorrq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorr_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe { simd_or(a, b) }
+pub fn vmul_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe { simd_mul(a, vdup_laneq_s16::<LANE>(b)) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u8)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31014,19 +32555,21 @@ pub fn vorr_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe { simd_or(a, b) }
+pub fn vmulq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe { simd_mul(a, vdupq_laneq_s16::<LANE>(b)) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31035,19 +32578,21 @@ pub fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorr_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe { simd_or(a, b) }
+pub fn vmul_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_mul(a, vdup_laneq_s32::<LANE>(b)) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31056,19 +32601,21 @@ pub fn vorr_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorrq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe { simd_or(a, b) }
+pub fn vmulq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_mul(a, vdupq_laneq_s32::<LANE>(b)) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31077,19 +32624,21 @@ pub fn vorrq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorr_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe { simd_or(a, b) }
+pub fn vmul_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe { simd_mul(a, vdup_laneq_u16::<LANE>(b)) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31098,19 +32647,21 @@ pub fn vorr_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorrq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe { simd_or(a, b) }
+pub fn vmulq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe { simd_mul(a, vdupq_laneq_u16::<LANE>(b)) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u64)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31119,19 +32670,21 @@ pub fn vorrq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorr_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    unsafe { simd_or(a, b) }
+pub fn vmul_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_mul(a, vdup_laneq_u32::<LANE>(b)) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u64)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31140,48 +32693,49 @@ pub fn vorr_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorrq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    unsafe { simd_or(a, b) }
+pub fn vmulq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_mul(a, vdupq_laneq_u32::<LANE>(b)) }
 }
-#[doc = "Signed Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s8)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sadalp)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    assert_instr(fmul)
 )]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vmul_n_f16(a: float16x4_t, b: f16) -> float16x4_t {
+    unsafe { simd_mul(a, vdup_n_f16(b)) }
+}
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f16)"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul)
 )]
-pub fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t {
-    let x: int16x4_t;
-    #[cfg(target_arch = "arm")]
-    {
-        x = priv_vpadal_s8(a, b);
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    unsafe {
-        x = simd_add(vpaddl_s8(b), a);
-    };
-    x
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vmulq_n_f16(a: float16x8_t, b: f16) -> float16x8_t {
+    unsafe { simd_mul(a, vdupq_n_f16(b)) }
 }
-#[doc = "Signed Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s8)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sadalp)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31191,27 +32745,18 @@ pub fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
-    let x: int16x8_t;
-    #[cfg(target_arch = "arm")]
-    {
-        x = priv_vpadalq_s8(a, b);
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    unsafe {
-        x = simd_add(vpaddlq_s8(b), a);
-    };
-    x
+pub fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t {
+    unsafe { simd_mul(a, vdup_n_f32(b)) }
 }
-#[doc = "Signed Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s16)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sadalp)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31221,27 +32766,18 @@ pub fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t {
-    let x: int32x2_t;
-    #[cfg(target_arch = "arm")]
-    {
-        x = priv_vpadal_s16(a, b);
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    unsafe {
-        x = simd_add(vpaddl_s16(b), a);
-    };
-    x
+pub fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t {
+    unsafe { simd_mul(a, vdupq_n_f32(b)) }
 }
-#[doc = "Signed Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s16)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sadalp)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31251,27 +32787,18 @@ pub fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
-    let x: int32x4_t;
-    #[cfg(target_arch = "arm")]
-    {
-        x = priv_vpadalq_s16(a, b);
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    unsafe {
-        x = simd_add(vpaddlq_s16(b), a);
-    };
-    x
+pub fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
+    unsafe { simd_mul(a, vdup_n_s16(b)) }
 }
-#[doc = "Signed Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s32)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sadalp)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31281,27 +32808,18 @@ pub fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t {
-    let x: int64x1_t;
-    #[cfg(target_arch = "arm")]
-    {
-        x = priv_vpadal_s32(a, b);
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    unsafe {
-        x = simd_add(vpaddl_s32(b), a);
-    };
-    x
+pub fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
+    unsafe { simd_mul(a, vdupq_n_s16(b)) }
 }
-#[doc = "Signed Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s32)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sadalp)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31311,27 +32829,18 @@ pub fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
-    let x: int64x2_t;
-    #[cfg(target_arch = "arm")]
-    {
-        x = priv_vpadalq_s32(a, b);
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    unsafe {
-        x = simd_add(vpaddlq_s32(b), a);
-    };
-    x
+pub fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
+    unsafe { simd_mul(a, vdup_n_s32(b)) }
 }
-#[doc = "Unsigned Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u8)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uadalp)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31341,27 +32850,18 @@ pub fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t {
-    let x: uint16x4_t;
-    #[cfg(target_arch = "arm")]
-    {
-        x = priv_vpadal_u8(a, b);
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    unsafe {
-        x = simd_add(vpaddl_u8(b), a);
-    };
-    x
+pub fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
+    unsafe { simd_mul(a, vdupq_n_s32(b)) }
 }
-#[doc = "Unsigned Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u8)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uadalp)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31371,27 +32871,18 @@ pub fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
-    let x: uint16x8_t;
-    #[cfg(target_arch = "arm")]
-    {
-        x = priv_vpadalq_u8(a, b);
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    unsafe {
-        x = simd_add(vpaddlq_u8(b), a);
-    };
-    x
+pub fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t {
+    unsafe { simd_mul(a, vdup_n_u16(b)) }
 }
-#[doc = "Unsigned Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u16)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uadalp)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31401,27 +32892,18 @@ pub fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t {
-    let x: uint32x2_t;
-    #[cfg(target_arch = "arm")]
-    {
-        x = priv_vpadal_u16(a, b);
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    unsafe {
-        x = simd_add(vpaddl_u16(b), a);
-    };
-    x
+pub fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t {
+    unsafe { simd_mul(a, vdupq_n_u16(b)) }
 }
-#[doc = "Unsigned Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u16)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uadalp)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31431,27 +32913,18 @@ pub fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
-    let x: uint32x4_t;
-    #[cfg(target_arch = "arm")]
-    {
-        x = priv_vpadalq_u16(a, b);
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    unsafe {
-        x = simd_add(vpaddlq_u16(b), a);
-    };
-    x
+pub fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t {
+    unsafe { simd_mul(a, vdup_n_u32(b)) }
 }
-#[doc = "Unsigned Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u32)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uadalp)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31461,27 +32934,18 @@ pub fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t {
-    let x: uint64x1_t;
-    #[cfg(target_arch = "arm")]
-    {
-        x = priv_vpadal_u32(a, b);
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    unsafe {
-        x = simd_add(vpaddl_u32(b), a);
-    };
-    x
+pub fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t {
+    unsafe { simd_mul(a, vdupq_n_u32(b)) }
 }
-#[doc = "Unsigned Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u32)"]
+#[doc = "Polynomial multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uadalp)
+    assert_instr(pmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31491,57 +32955,55 @@ pub fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
-    let x: uint64x2_t;
-    #[cfg(target_arch = "arm")]
-    {
-        x = priv_vpadalq_u32(a, b);
+pub fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.pmul.v8i8"
+        )]
+        fn _vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t;
     }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    unsafe {
-        x = simd_add(vpaddlq_u32(b), a);
-    };
-    x
+    unsafe { _vmul_p8(a, b) }
 }
-#[doc = "Floating-point add pairwise"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f16)"]
+#[doc = "Polynomial multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_p8)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(faddp)
+    assert_instr(pmul)
 )]
-#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+pub fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4f16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.faddp.v4f16"
+            link_name = "llvm.aarch64.neon.pmul.v16i8"
         )]
-        fn _vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
+        fn _vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t;
     }
-    unsafe { _vpadd_f16(a, b) }
+    unsafe { _vmulq_p8(a, b) }
 }
-#[doc = "Floating-point add pairwise"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(faddp)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31551,26 +33013,18 @@ pub fn vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.faddp.v2f32"
-        )]
-        fn _vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-    unsafe { _vpadd_f32(a, b) }
+pub fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Add pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s8)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(addp)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31580,26 +33034,18 @@ pub fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.addp.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v8i8")]
-        fn _vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-    unsafe { _vpadd_s8(a, b) }
+pub fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Add pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(addp)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31609,26 +33055,18 @@ pub fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.addp.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4i16")]
-        fn _vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-    unsafe { _vpadd_s16(a, b) }
+pub fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Add pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(addp)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31638,26 +33076,18 @@ pub fn vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.addp.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2i32")]
-        fn _vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-    unsafe { _vpadd_s32(a, b) }
+pub fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Add pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u8)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(addp)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31667,18 +33097,18 @@ pub fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe { transmute(vpadd_s8(transmute(a), transmute(b))) }
+pub fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Add pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(addp)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31688,18 +33118,18 @@ pub fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe { transmute(vpadd_s16(transmute(a), transmute(b))) }
+pub fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Add pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(addp)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31709,18 +33139,18 @@ pub fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe { transmute(vpadd_s32(transmute(a), transmute(b))) }
+pub fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Signed Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s8)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(saddlp)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31730,26 +33160,18 @@ pub fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpaddl_s8(a: int8x8_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.saddlp.v4i16.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i16.v8i8")]
-        fn _vpaddl_s8(a: int8x8_t) -> int16x4_t;
-    }
-    unsafe { _vpaddl_s8(a) }
+pub fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Signed Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s8)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(saddlp)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31759,26 +33181,18 @@ pub fn vpaddl_s8(a: int8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpaddlq_s8(a: int8x16_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.saddlp.v8i16.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v8i16.v16i8")]
-        fn _vpaddlq_s8(a: int8x16_t) -> int16x8_t;
-    }
-    unsafe { _vpaddlq_s8(a) }
+pub fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Signed Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(saddlp)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31788,26 +33202,18 @@ pub fn vpaddlq_s8(a: int8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpaddl_s16(a: int16x4_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.saddlp.v2i32.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i32.v4i16")]
-        fn _vpaddl_s16(a: int16x4_t) -> int32x2_t;
-    }
-    unsafe { _vpaddl_s16(a) }
+pub fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Signed Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(saddlp)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31817,26 +33223,18 @@ pub fn vpaddl_s16(a: int16x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpaddlq_s16(a: int16x8_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.saddlp.v4i32.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i32.v8i16")]
-        fn _vpaddlq_s16(a: int16x8_t) -> int32x4_t;
-    }
-    unsafe { _vpaddlq_s16(a) }
+pub fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Signed Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(saddlp)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31846,27 +33244,20 @@ pub fn vpaddlq_s16(a: int16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpaddl_s32(a: int32x2_t) -> int64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.saddlp.v1i64.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v1i64.v2i32")]
-        fn _vpaddl_s32(a: int32x2_t) -> int64x1_t;
-    }
-    unsafe { _vpaddl_s32(a) }
+pub fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Signed Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s32)"]
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(saddlp)
+    assert_instr(smull, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31875,27 +33266,21 @@ pub fn vpaddl_s32(a: int32x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpaddlq_s32(a: int32x4_t) -> int64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.saddlp.v2i64.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i64.v4i32")]
-        fn _vpaddlq_s32(a: int32x4_t) -> int64x2_t;
-    }
-    unsafe { _vpaddlq_s32(a) }
+pub fn vmull_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmull_s16(a, vdup_lane_s16::<LANE>(b))
 }
-#[doc = "Unsigned Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u8)"]
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uaddlp)
+    assert_instr(smull, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31904,27 +33289,21 @@ pub fn vpaddlq_s32(a: int32x4_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpaddl_u8(a: uint8x8_t) -> uint16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uaddlp.v4i16.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i16.v8i8")]
-        fn _vpaddl_u8(a: uint8x8_t) -> uint16x4_t;
-    }
-    unsafe { _vpaddl_u8(a) }
+pub fn vmull_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmull_s16(a, vdup_laneq_s16::<LANE>(b))
 }
-#[doc = "Unsigned Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u8)"]
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uaddlp)
+    assert_instr(smull, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31933,27 +33312,21 @@ pub fn vpaddl_u8(a: uint8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpaddlq_u8(a: uint8x16_t) -> uint16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uaddlp.v8i16.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v8i16.v16i8")]
-        fn _vpaddlq_u8(a: uint8x16_t) -> uint16x8_t;
-    }
-    unsafe { _vpaddlq_u8(a) }
+pub fn vmull_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmull_s32(a, vdup_lane_s32::<LANE>(b))
 }
-#[doc = "Unsigned Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u16)"]
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uaddlp)
+    assert_instr(smull, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31962,27 +33335,21 @@ pub fn vpaddlq_u8(a: uint8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpaddl_u16(a: uint16x4_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uaddlp.v2i32.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i32.v4i16")]
-        fn _vpaddl_u16(a: uint16x4_t) -> uint32x2_t;
-    }
-    unsafe { _vpaddl_u16(a) }
+pub fn vmull_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmull_s32(a, vdup_laneq_s32::<LANE>(b))
 }
-#[doc = "Unsigned Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u16)"]
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uaddlp)
+    assert_instr(umull, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31991,27 +33358,21 @@ pub fn vpaddl_u16(a: uint16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpaddlq_u16(a: uint16x8_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uaddlp.v4i32.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i32.v8i16")]
-        fn _vpaddlq_u16(a: uint16x8_t) -> uint32x4_t;
-    }
-    unsafe { _vpaddlq_u16(a) }
+pub fn vmull_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmull_u16(a, vdup_lane_u16::<LANE>(b))
 }
-#[doc = "Unsigned Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u32)"]
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uaddlp)
+    assert_instr(umull, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -32020,27 +33381,21 @@ pub fn vpaddlq_u16(a: uint16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpaddl_u32(a: uint32x2_t) -> uint64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uaddlp.v1i64.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v1i64.v2i32")]
-        fn _vpaddl_u32(a: uint32x2_t) -> uint64x1_t;
-    }
-    unsafe { _vpaddl_u32(a) }
+pub fn vmull_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmull_u16(a, vdup_laneq_u16::<LANE>(b))
 }
-#[doc = "Unsigned Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u32)"]
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uaddlp)
+    assert_instr(umull, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -32049,27 +33404,21 @@ pub fn vpaddl_u32(a: uint32x2_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpaddlq_u32(a: uint32x4_t) -> uint64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uaddlp.v2i64.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i64.v4i32")]
-        fn _vpaddlq_u32(a: uint32x4_t) -> uint64x2_t;
-    }
-    unsafe { _vpaddlq_u32(a) }
+pub fn vmull_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmull_u32(a, vdup_lane_u32::<LANE>(b))
 }
-#[doc = "Folding maximum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_f32)"]
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmaxp)
+    assert_instr(umull, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -32078,26 +33427,19 @@ pub fn vpaddlq_u32(a: uint32x4_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmaxp.v2f32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")]
-        fn _vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-    unsafe { _vpmax_f32(a, b) }
+pub fn vmull_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmull_u32(a, vdup_laneq_u32::<LANE>(b))
 }
-#[doc = "Folding maximum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s8)"]
+#[doc = "Vector long multiply with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smaxp)
+    assert_instr(smull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32107,26 +33449,18 @@ pub fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smaxp.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v8i8")]
-        fn _vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-    unsafe { _vpmax_s8(a, b) }
+pub fn vmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
+    vmull_s16(a, vdup_n_s16(b))
 }
-#[doc = "Folding maximum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s16)"]
+#[doc = "Vector long multiply with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smaxp)
+    assert_instr(smull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32136,26 +33470,18 @@ pub fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smaxp.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v4i16")]
-        fn _vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-    unsafe { _vpmax_s16(a, b) }
+pub fn vmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
+    vmull_s32(a, vdup_n_s32(b))
 }
-#[doc = "Folding maximum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s32)"]
+#[doc = "Vector long multiply with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smaxp)
+    assert_instr(umull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32165,26 +33491,18 @@ pub fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smaxp.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2i32")]
-        fn _vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-    unsafe { _vpmax_s32(a, b) }
+pub fn vmull_n_u16(a: uint16x4_t, b: u16) -> uint32x4_t {
+    vmull_u16(a, vdup_n_u16(b))
 }
-#[doc = "Folding maximum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u8)"]
+#[doc = "Vector long multiply with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umaxp)
+    assert_instr(umull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32194,26 +33512,18 @@ pub fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.umaxp.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v8i8")]
-        fn _vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
-    }
-    unsafe { _vpmax_u8(a, b) }
+pub fn vmull_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t {
+    vmull_u32(a, vdup_n_u32(b))
 }
-#[doc = "Folding maximum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u16)"]
+#[doc = "Polynomial multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.p8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umaxp)
+    assert_instr(pmull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32223,26 +33533,26 @@ pub fn vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+pub fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.umaxp.v4i16"
+            link_name = "llvm.aarch64.neon.pmull.v8i16"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v4i16")]
-        fn _vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullp.v8i16")]
+        fn _vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t;
     }
-    unsafe { _vpmax_u16(a, b) }
+    unsafe { _vmull_p8(a, b) }
 }
-#[doc = "Folding maximum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u32)"]
+#[doc = "Signed multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umaxp)
+    assert_instr(smull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32252,26 +33562,18 @@ pub fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.umaxp.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v2i32")]
-        fn _vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
-    }
-    unsafe { _vpmax_u32(a, b) }
+pub fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
+    unsafe { simd_mul(simd_cast(a), simd_cast(b)) }
 }
-#[doc = "Folding minimum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_f32)"]
+#[doc = "Signed multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fminp)
+    assert_instr(smull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32281,26 +33583,18 @@ pub fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fminp.v2f32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2f32")]
-        fn _vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-    unsafe { _vpmin_f32(a, b) }
+pub fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
+    unsafe { simd_mul(simd_cast(a), simd_cast(b)) }
 }
-#[doc = "Folding minimum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s8)"]
+#[doc = "Signed multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sminp)
+    assert_instr(smull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32310,26 +33604,18 @@ pub fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sminp.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v8i8")]
-        fn _vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-    unsafe { _vpmin_s8(a, b) }
+pub fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
+    unsafe { simd_mul(simd_cast(a), simd_cast(b)) }
 }
-#[doc = "Folding minimum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s16)"]
+#[doc = "Unsigned multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sminp)
+    assert_instr(umull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32339,26 +33625,18 @@ pub fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sminp.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v4i16")]
-        fn _vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-    unsafe { _vpmin_s16(a, b) }
+pub fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
+    unsafe { simd_mul(simd_cast(a), simd_cast(b)) }
 }
-#[doc = "Folding minimum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s32)"]
+#[doc = "Unsigned multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sminp)
+    assert_instr(umull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32368,26 +33646,18 @@ pub fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sminp.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2i32")]
-        fn _vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-    unsafe { _vpmin_s32(a, b) }
+pub fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
+    unsafe { simd_mul(simd_cast(a), simd_cast(b)) }
 }
-#[doc = "Folding minimum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u8)"]
+#[doc = "Unsigned multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uminp)
+    assert_instr(umull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32397,26 +33667,18 @@ pub fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uminp.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v8i8")]
-        fn _vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
-    }
-    unsafe { _vpmin_u8(a, b) }
+pub fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
+    unsafe { simd_mul(simd_cast(a), simd_cast(b)) }
 }
-#[doc = "Folding minimum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u16)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uminp)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32426,26 +33688,19 @@ pub fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uminp.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v4i16")]
-        fn _vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
-    }
-    unsafe { _vpmin_u16(a, b) }
+pub fn vmvn_p8(a: poly8x8_t) -> poly8x8_t {
+    let b = poly8x8_t::splat(255);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Folding minimum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u32)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uminp)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32455,26 +33710,19 @@ pub fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uminp.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v2i32")]
-        fn _vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
-    }
-    unsafe { _vpmin_u32(a, b) }
+pub fn vmvn_s16(a: int16x4_t) -> int16x4_t {
+    let b = int16x4_t::splat(-1);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Signed saturating Absolute value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s8)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqabs)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32484,26 +33732,19 @@ pub fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqabs_s8(a: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqabs.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i8")]
-        fn _vqabs_s8(a: int8x8_t) -> int8x8_t;
-    }
-    unsafe { _vqabs_s8(a) }
+pub fn vmvn_s32(a: int32x2_t) -> int32x2_t {
+    let b = int32x2_t::splat(-1);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Signed saturating Absolute value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s8)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqabs)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32513,26 +33754,19 @@ pub fn vqabs_s8(a: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqabsq_s8(a: int8x16_t) -> int8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqabs.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v16i8")]
-        fn _vqabsq_s8(a: int8x16_t) -> int8x16_t;
-    }
-    unsafe { _vqabsq_s8(a) }
+pub fn vmvn_s8(a: int8x8_t) -> int8x8_t {
+    let b = int8x8_t::splat(-1);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Signed saturating Absolute value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s16)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqabs)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32542,26 +33776,19 @@ pub fn vqabsq_s8(a: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqabs_s16(a: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqabs.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i16")]
-        fn _vqabs_s16(a: int16x4_t) -> int16x4_t;
-    }
-    unsafe { _vqabs_s16(a) }
+pub fn vmvn_u16(a: uint16x4_t) -> uint16x4_t {
+    let b = uint16x4_t::splat(65_535);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Signed saturating Absolute value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s16)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqabs)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32571,26 +33798,19 @@ pub fn vqabs_s16(a: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqabsq_s16(a: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqabs.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i16")]
-        fn _vqabsq_s16(a: int16x8_t) -> int16x8_t;
-    }
-    unsafe { _vqabsq_s16(a) }
+pub fn vmvn_u32(a: uint32x2_t) -> uint32x2_t {
+    let b = uint32x2_t::splat(4_294_967_295);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Signed saturating Absolute value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s32)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqabs)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32600,26 +33820,19 @@ pub fn vqabsq_s16(a: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqabs_s32(a: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqabs.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v2i32")]
-        fn _vqabs_s32(a: int32x2_t) -> int32x2_t;
-    }
-    unsafe { _vqabs_s32(a) }
+pub fn vmvn_u8(a: uint8x8_t) -> uint8x8_t {
+    let b = uint8x8_t::splat(255);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Signed saturating Absolute value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s32)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqabs)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32629,26 +33842,19 @@ pub fn vqabs_s32(a: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqabsq_s32(a: int32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqabs.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i32")]
-        fn _vqabsq_s32(a: int32x4_t) -> int32x4_t;
-    }
-    unsafe { _vqabsq_s32(a) }
+pub fn vmvnq_p8(a: poly8x16_t) -> poly8x16_t {
+    let b = poly8x16_t::splat(255);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s8)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqadd)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32658,18 +33864,19 @@ pub fn vqabsq_s32(a: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vmvnq_s16(a: int16x8_t) -> int16x8_t {
+    let b = int16x8_t::splat(-1);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s8)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqadd)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32679,18 +33886,19 @@ pub fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vmvnq_s32(a: int32x4_t) -> int32x4_t {
+    let b = int32x4_t::splat(-1);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s16)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqadd)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32700,18 +33908,19 @@ pub fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vmvnq_s8(a: int8x16_t) -> int8x16_t {
+    let b = int8x16_t::splat(-1);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s16)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqadd)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32721,18 +33930,19 @@ pub fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vmvnq_u16(a: uint16x8_t) -> uint16x8_t {
+    let b = uint16x8_t::splat(65_535);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s32)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqadd)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32742,18 +33952,19 @@ pub fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vmvnq_u32(a: uint32x4_t) -> uint32x4_t {
+    let b = uint32x4_t::splat(4_294_967_295);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s32)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqadd)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32763,60 +33974,63 @@ pub fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vmvnq_u8(a: uint8x16_t) -> uint8x16_t {
+    let b = uint8x16_t::splat(255);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s64)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqadd)
+    assert_instr(fneg)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    unsafe { simd_saturating_add(a, b) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vneg_f16(a: float16x4_t) -> float16x4_t {
+    unsafe { simd_neg(a) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s64)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqadd)
+    assert_instr(fneg)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    unsafe { simd_saturating_add(a, b) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vnegq_f16(a: float16x8_t) -> float16x8_t {
+    unsafe { simd_neg(a) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u8)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqadd)
+    assert_instr(fneg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32826,18 +34040,18 @@ pub fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vneg_f32(a: float32x2_t) -> float32x2_t {
+    unsafe { simd_neg(a) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u8)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqadd)
+    assert_instr(fneg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32847,18 +34061,18 @@ pub fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vnegq_f32(a: float32x4_t) -> float32x4_t {
+    unsafe { simd_neg(a) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u16)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqadd)
+    assert_instr(neg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32868,18 +34082,18 @@ pub fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vneg_s8(a: int8x8_t) -> int8x8_t {
+    unsafe { simd_neg(a) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u16)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqadd)
+    assert_instr(neg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32889,18 +34103,18 @@ pub fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vnegq_s8(a: int8x16_t) -> int8x16_t {
+    unsafe { simd_neg(a) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u32)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqadd)
+    assert_instr(neg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32910,18 +34124,18 @@ pub fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vneg_s16(a: int16x4_t) -> int16x4_t {
+    unsafe { simd_neg(a) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u32)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqadd)
+    assert_instr(neg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32931,18 +34145,18 @@ pub fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vnegq_s16(a: int16x8_t) -> int16x8_t {
+    unsafe { simd_neg(a) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u64)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqadd)
+    assert_instr(neg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32952,18 +34166,18 @@ pub fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vneg_s32(a: int32x2_t) -> int32x2_t {
+    unsafe { simd_neg(a) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u64)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqadd)
+    assert_instr(neg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32973,24 +34187,19 @@ pub fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vnegq_s32(a: int32x4_t) -> int32x4_t {
+    unsafe { simd_neg(a) }
 }
-#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s16)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
-    all(
-        test,
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        target_endian = "little"
-    ),
-    assert_instr(sqdmlal, N = 2)
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orn)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -32999,25 +34208,20 @@ pub fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmlal_lane_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    vqaddq_s32(a, vqdmull_lane_s16::<N>(b, c))
+pub fn vorn_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    let c = int16x4_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, c), a) }
 }
-#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s32)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
-    all(
-        test,
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        target_endian = "little"
-    ),
-    assert_instr(sqdmlal, N = 1)
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orn)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -33026,19 +34230,19 @@ pub fn vqdmlal_lane_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmlal_lane_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    vqaddq_s64(a, vqdmull_lane_s32::<N>(b, c))
+pub fn vorn_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    let c = int32x2_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, c), a) }
 }
-#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s16)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlal)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33048,18 +34252,19 @@ pub fn vqdmlal_lane_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
-    vqaddq_s32(a, vqdmull_n_s16(b, c))
+pub fn vorn_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    let c = int64x1_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, c), a) }
 }
-#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s32)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlal)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33069,18 +34274,19 @@ pub fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
-    vqaddq_s64(a, vqdmull_n_s32(b, c))
+pub fn vorn_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    let c = int8x8_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, c), a) }
 }
-#[doc = "Signed saturating doubling multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s16)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlal)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33090,18 +34296,19 @@ pub fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    vqaddq_s32(a, vqdmull_s16(b, c))
+pub fn vornq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    let c = int16x8_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, c), a) }
 }
-#[doc = "Signed saturating doubling multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s32)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlal)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33111,24 +34318,20 @@ pub fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    vqaddq_s64(a, vqdmull_s32(b, c))
+pub fn vornq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    let c = int32x4_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, c), a) }
 }
-#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s16)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
-    all(
-        test,
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        target_endian = "little"
-    ),
-    assert_instr(sqdmlsl, N = 2)
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orn)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -33137,25 +34340,20 @@ pub fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmlsl_lane_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    vqsubq_s32(a, vqdmull_lane_s16::<N>(b, c))
+pub fn vornq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    let c = int64x2_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, c), a) }
 }
-#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s32)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
-    all(
-        test,
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        target_endian = "little"
-    ),
-    assert_instr(sqdmlsl, N = 1)
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orn)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -33164,19 +34362,19 @@ pub fn vqdmlsl_lane_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmlsl_lane_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    vqsubq_s64(a, vqdmull_lane_s32::<N>(b, c))
+pub fn vornq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    let c = int8x16_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, c), a) }
 }
-#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s16)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlsl)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33186,18 +34384,19 @@ pub fn vqdmlsl_lane_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
-    vqsubq_s32(a, vqdmull_n_s16(b, c))
+pub fn vorn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    let c = int16x4_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
 }
-#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s32)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlsl)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33207,18 +34406,19 @@ pub fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
-    vqsubq_s64(a, vqdmull_n_s32(b, c))
+pub fn vorn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    let c = int32x2_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
 }
-#[doc = "Signed saturating doubling multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s16)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlsl)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33228,18 +34428,19 @@ pub fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    vqsubq_s32(a, vqdmull_s16(b, c))
+pub fn vorn_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    let c = int64x1_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
 }
-#[doc = "Signed saturating doubling multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s32)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlsl)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33249,20 +34450,20 @@ pub fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    vqsubq_s64(a, vqdmull_s32(b, c))
+pub fn vorn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    let c = int8x8_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
 }
-#[doc = "Vector saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s16)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh, LANE = 0)
+    assert_instr(orn)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -33271,21 +34472,20 @@ pub fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vqdmulh_s16(a, vdup_n_s16(vgetq_lane_s16::<LANE>(b)))
+pub fn vornq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    let c = int16x8_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
 }
-#[doc = "Vector saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s16)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh, LANE = 0)
+    assert_instr(orn)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -33294,21 +34494,20 @@ pub fn vqdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vqdmulhq_s16(a, vdupq_n_s16(vgetq_lane_s16::<LANE>(b)))
+pub fn vornq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    let c = int32x4_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
 }
-#[doc = "Vector saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s32)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh, LANE = 0)
+    assert_instr(orn)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -33317,21 +34516,20 @@ pub fn vqdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vqdmulh_s32(a, vdup_n_s32(vgetq_lane_s32::<LANE>(b)))
+pub fn vornq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    let c = int64x2_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
 }
-#[doc = "Vector saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s32)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh, LANE = 0)
+    assert_instr(orn)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -33340,19 +34538,19 @@ pub fn vqdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vqdmulhq_s32(a, vdupq_n_s32(vgetq_lane_s32::<LANE>(b)))
+pub fn vornq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    let c = int8x16_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
 }
-#[doc = "Vector saturating doubling multiply high with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s16)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33362,19 +34560,18 @@ pub fn vqdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
-    let b: int16x4_t = vdup_n_s16(b);
-    vqdmulh_s16(a, b)
+pub fn vorr_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Vector saturating doubling multiply high with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s16)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33384,19 +34581,18 @@ pub fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
-    let b: int16x8_t = vdupq_n_s16(b);
-    vqdmulhq_s16(a, b)
+pub fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Vector saturating doubling multiply high with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s32)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33406,19 +34602,18 @@ pub fn vqdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
-    let b: int32x2_t = vdup_n_s32(b);
-    vqdmulh_s32(a, b)
+pub fn vorr_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Vector saturating doubling multiply high with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s32)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33428,19 +34623,18 @@ pub fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
-    let b: int32x4_t = vdupq_n_s32(b);
-    vqdmulhq_s32(a, b)
+pub fn vorrq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Signed saturating doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s16)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33450,26 +34644,18 @@ pub fn vqdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqdmulh.v4i16"
-        )]
-        fn _vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-    unsafe { _vqdmulh_s16(a, b) }
+pub fn vorr_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Signed saturating doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s16)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33479,26 +34665,18 @@ pub fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v8i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqdmulh.v8i16"
-        )]
-        fn _vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-    unsafe { _vqdmulhq_s16(a, b) }
+pub fn vorrq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Signed saturating doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s32)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33508,26 +34686,18 @@ pub fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v2i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqdmulh.v2i32"
-        )]
-        fn _vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-    unsafe { _vqdmulh_s32(a, b) }
+pub fn vorr_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Signed saturating doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s32)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33537,28 +34707,19 @@ pub fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqdmulh.v4i32"
-        )]
-        fn _vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-    unsafe { _vqdmulhq_s32(a, b) }
+pub fn vorrq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Vector saturating doubling long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s16)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmull, N = 2)
+    assert_instr(orr)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -33567,22 +34728,19 @@ pub fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmull_lane_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    let b = vdup_lane_s16::<N>(b);
-    vqdmull_s16(a, b)
+pub fn vorr_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Vector saturating doubling long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s32)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmull, N = 1)
+    assert_instr(orr)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -33591,20 +34749,18 @@ pub fn vqdmull_lane_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmull_lane_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    let b = vdup_lane_s32::<N>(b);
-    vqdmull_s32(a, b)
+pub fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Vector saturating doubling long multiply with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s16)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmull)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33614,18 +34770,18 @@ pub fn vqdmull_lane_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
-    vqdmull_s16(a, vdup_n_s16(b))
+pub fn vorr_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Vector saturating doubling long multiply with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s32)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmull)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33635,18 +34791,18 @@ pub fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
-    vqdmull_s32(a, vdup_n_s32(b))
+pub fn vorrq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Signed saturating doubling multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s16)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmull)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33656,26 +34812,18 @@ pub fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v4i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqdmull.v4i32"
-        )]
-        fn _vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t;
-    }
-    unsafe { _vqdmull_s16(a, b) }
+pub fn vorr_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Signed saturating doubling multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s32)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmull)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33685,26 +34833,18 @@ pub fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v2i64")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqdmull.v2i64"
-        )]
-        fn _vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t;
-    }
-    unsafe { _vqdmull_s32(a, b) }
+pub fn vorrq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Signed saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s16)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqxtn)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33714,26 +34854,18 @@ pub fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqmovn_s16(a: int16x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v8i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqxtn.v8i8"
-        )]
-        fn _vqmovn_s16(a: int16x8_t) -> int8x8_t;
-    }
-    unsafe { _vqmovn_s16(a) }
+pub fn vorr_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Signed saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s32)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqxtn)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33743,26 +34875,18 @@ pub fn vqmovn_s16(a: int16x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqmovn_s32(a: int32x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v4i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqxtn.v4i16"
-        )]
-        fn _vqmovn_s32(a: int32x4_t) -> int16x4_t;
-    }
-    unsafe { _vqmovn_s32(a) }
+pub fn vorrq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Signed saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s64)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqxtn)
+    assert_instr(sadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33772,26 +34896,27 @@ pub fn vqmovn_s32(a: int32x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqmovn_s64(a: int64x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v2i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqxtn.v2i32"
-        )]
-        fn _vqmovn_s64(a: int64x2_t) -> int32x2_t;
+pub fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t {
+    let x: int16x4_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadal_s8(a, b);
     }
-    unsafe { _vqmovn_s64(a) }
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    unsafe {
+        x = simd_add(vpaddl_s8(b), a);
+    };
+    x
 }
-#[doc = "Unsigned saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u16)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqxtn)
+    assert_instr(sadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33801,26 +34926,27 @@ pub fn vqmovn_s64(a: int64x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v8i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqxtn.v8i8"
-        )]
-        fn _vqmovn_u16(a: uint16x8_t) -> uint8x8_t;
+pub fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
+    let x: int16x8_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadalq_s8(a, b);
     }
-    unsafe { _vqmovn_u16(a) }
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    unsafe {
+        x = simd_add(vpaddlq_s8(b), a);
+    };
+    x
 }
-#[doc = "Unsigned saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u32)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqxtn)
+    assert_instr(sadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33830,26 +34956,27 @@ pub fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v4i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqxtn.v4i16"
-        )]
-        fn _vqmovn_u32(a: uint32x4_t) -> uint16x4_t;
+pub fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t {
+    let x: int32x2_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadal_s16(a, b);
     }
-    unsafe { _vqmovn_u32(a) }
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    unsafe {
+        x = simd_add(vpaddl_s16(b), a);
+    };
+    x
 }
-#[doc = "Unsigned saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u64)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqxtn)
+    assert_instr(sadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33859,26 +34986,27 @@ pub fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v2i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqxtn.v2i32"
-        )]
-        fn _vqmovn_u64(a: uint64x2_t) -> uint32x2_t;
+pub fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
+    let x: int32x4_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadalq_s16(a, b);
     }
-    unsafe { _vqmovn_u64(a) }
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    unsafe {
+        x = simd_add(vpaddlq_s16(b), a);
+    };
+    x
 }
-#[doc = "Signed saturating extract unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s16)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqxtun)
+    assert_instr(sadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33888,26 +35016,27 @@ pub fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqmovun_s16(a: int16x8_t) -> uint8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v8i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqxtun.v8i8"
-        )]
-        fn _vqmovun_s16(a: int16x8_t) -> uint8x8_t;
+pub fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t {
+    let x: int64x1_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadal_s32(a, b);
     }
-    unsafe { _vqmovun_s16(a) }
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    unsafe {
+        x = simd_add(vpaddl_s32(b), a);
+    };
+    x
 }
-#[doc = "Signed saturating extract unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s32)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqxtun)
+    assert_instr(sadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33917,26 +35046,27 @@ pub fn vqmovun_s16(a: int16x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqmovun_s32(a: int32x4_t) -> uint16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v4i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqxtun.v4i16"
-        )]
-        fn _vqmovun_s32(a: int32x4_t) -> uint16x4_t;
+pub fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
+    let x: int64x2_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadalq_s32(a, b);
     }
-    unsafe { _vqmovun_s32(a) }
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    unsafe {
+        x = simd_add(vpaddlq_s32(b), a);
+    };
+    x
 }
-#[doc = "Signed saturating extract unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s64)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqxtun)
+    assert_instr(uadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33946,26 +35076,27 @@ pub fn vqmovun_s32(a: int32x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqmovun_s64(a: int64x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v2i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqxtun.v2i32"
-        )]
-        fn _vqmovun_s64(a: int64x2_t) -> uint32x2_t;
+pub fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t {
+    let x: uint16x4_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadal_u8(a, b);
     }
-    unsafe { _vqmovun_s64(a) }
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    unsafe {
+        x = simd_add(vpaddl_u8(b), a);
+    };
+    x
 }
-#[doc = "Signed saturating negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s8)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqneg)
+    assert_instr(uadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33975,26 +35106,27 @@ pub fn vqmovun_s64(a: int64x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqneg_s8(a: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqneg.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i8")]
-        fn _vqneg_s8(a: int8x8_t) -> int8x8_t;
+pub fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
+    let x: uint16x8_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadalq_u8(a, b);
     }
-    unsafe { _vqneg_s8(a) }
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    unsafe {
+        x = simd_add(vpaddlq_u8(b), a);
+    };
+    x
 }
-#[doc = "Signed saturating negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s8)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqneg)
+    assert_instr(uadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34004,26 +35136,27 @@ pub fn vqneg_s8(a: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqnegq_s8(a: int8x16_t) -> int8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqneg.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v16i8")]
-        fn _vqnegq_s8(a: int8x16_t) -> int8x16_t;
+pub fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t {
+    let x: uint32x2_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadal_u16(a, b);
     }
-    unsafe { _vqnegq_s8(a) }
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    unsafe {
+        x = simd_add(vpaddl_u16(b), a);
+    };
+    x
 }
-#[doc = "Signed saturating negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s16)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqneg)
+    assert_instr(uadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34033,26 +35166,27 @@ pub fn vqnegq_s8(a: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqneg_s16(a: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqneg.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i16")]
-        fn _vqneg_s16(a: int16x4_t) -> int16x4_t;
+pub fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
+    let x: uint32x4_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadalq_u16(a, b);
     }
-    unsafe { _vqneg_s16(a) }
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    unsafe {
+        x = simd_add(vpaddlq_u16(b), a);
+    };
+    x
 }
-#[doc = "Signed saturating negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s16)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqneg)
+    assert_instr(uadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34062,26 +35196,27 @@ pub fn vqneg_s16(a: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqnegq_s16(a: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqneg.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i16")]
-        fn _vqnegq_s16(a: int16x8_t) -> int16x8_t;
+pub fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t {
+    let x: uint64x1_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadal_u32(a, b);
     }
-    unsafe { _vqnegq_s16(a) }
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    unsafe {
+        x = simd_add(vpaddl_u32(b), a);
+    };
+    x
 }
-#[doc = "Signed saturating negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s32)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqneg)
+    assert_instr(uadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34091,81 +35226,96 @@ pub fn vqnegq_s16(a: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqneg_s32(a: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqneg.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v2i32")]
-        fn _vqneg_s32(a: int32x2_t) -> int32x2_t;
+pub fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
+    let x: uint64x2_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadalq_u32(a, b);
     }
-    unsafe { _vqneg_s32(a) }
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    unsafe {
+        x = simd_add(vpaddlq_u32(b), a);
+    };
+    x
 }
-#[doc = "Signed saturating negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s32)"]
+#[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))]
+#[cfg(target_endian = "little")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqneg)
+    assert_instr(faddp)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqnegq_s32(a: int32x4_t) -> int32x4_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4f16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqneg.v4i32"
+            link_name = "llvm.aarch64.neon.faddp.v4f16"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i32")]
-        fn _vqnegq_s32(a: int32x4_t) -> int32x4_t;
+        fn _vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
     }
-    unsafe { _vqnegq_s32(a) }
+    unsafe { _vpadd_f16(a, b) }
 }
-#[doc = "Vector rounding saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s16)"]
+#[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg(target_endian = "big")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh, LANE = 1)
+    assert_instr(faddp)
 )]
-#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let b = vdup_lane_s16::<LANE>(b);
-    vqrdmulh_s16(a, b)
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.faddp.v4f16"
+        )]
+        fn _vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
+    }
+    unsafe {
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float16x4_t = _vpadd_f16(a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Vector rounding saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s32)"]
+#[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh, LANE = 1)
+    assert_instr(faddp)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -34174,22 +35324,28 @@ pub fn vqrdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let b = vdup_lane_s32::<LANE>(b);
-    vqrdmulh_s32(a, b)
+pub fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.faddp.v2f32"
+        )]
+        fn _vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    unsafe { _vpadd_f32(a, b) }
 }
-#[doc = "Vector rounding saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s16)"]
+#[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh, LANE = 1)
+    assert_instr(faddp)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -34198,22 +35354,33 @@ pub fn vqrdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    let b = vdup_laneq_s16::<LANE>(b);
-    vqrdmulh_s16(a, b)
+pub fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.faddp.v2f32"
+        )]
+        fn _vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    unsafe {
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float32x2_t = _vpadd_f32(a, b);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Vector rounding saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s32)"]
+#[doc = "Add pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh, LANE = 1)
+    assert_instr(addp)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -34222,22 +35389,28 @@ pub fn vqrdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let b = vdup_laneq_s32::<LANE>(b);
-    vqrdmulh_s32(a, b)
+pub fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.addp.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v8i8")]
+        fn _vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    unsafe { _vpadd_s8(a, b) }
 }
-#[doc = "Vector rounding saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s16)"]
+#[doc = "Add pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh, LANE = 1)
+    assert_instr(addp)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -34246,22 +35419,33 @@ pub fn vqrdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let b = vdupq_lane_s16::<LANE>(b);
-    vqrdmulhq_s16(a, b)
+pub fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.addp.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v8i8")]
+        fn _vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    unsafe {
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = _vpadd_s8(a, b);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Vector rounding saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s32)"]
+#[doc = "Add pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh, LANE = 1)
+    assert_instr(addp)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -34270,22 +35454,28 @@ pub fn vqrdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let b = vdupq_lane_s32::<LANE>(b);
-    vqrdmulhq_s32(a, b)
+pub fn vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.addp.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4i16")]
+        fn _vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    unsafe { _vpadd_s16(a, b) }
 }
-#[doc = "Vector rounding saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s16)"]
+#[doc = "Add pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh, LANE = 1)
+    assert_instr(addp)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -34294,22 +35484,33 @@ pub fn vqrdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    let b = vdupq_laneq_s16::<LANE>(b);
-    vqrdmulhq_s16(a, b)
+pub fn vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.addp.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4i16")]
+        fn _vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    unsafe {
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: int16x4_t = _vpadd_s16(a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Vector rounding saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s32)"]
+#[doc = "Add pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh, LANE = 1)
+    assert_instr(addp)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -34318,20 +35519,27 @@ pub fn vqrdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let b = vdupq_laneq_s32::<LANE>(b);
-    vqrdmulhq_s32(a, b)
+pub fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.addp.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2i32")]
+        fn _vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    unsafe { _vpadd_s32(a, b) }
 }
-#[doc = "Vector saturating rounding doubling multiply high with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s16)"]
+#[doc = "Add pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh)
+    assert_instr(addp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34341,18 +35549,31 @@ pub fn vqrdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
-    vqrdmulh_s16(a, vdup_n_s16(b))
+pub fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.addp.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2i32")]
+        fn _vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    unsafe {
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: int32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: int32x2_t = _vpadd_s32(a, b);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Vector saturating rounding doubling multiply high with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s16)"]
+#[doc = "Add pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh)
+    assert_instr(addp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34362,18 +35583,18 @@ pub fn vqrdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
-    vqrdmulhq_s16(a, vdupq_n_s16(b))
+pub fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe { transmute(vpadd_s8(transmute(a), transmute(b))) }
 }
-#[doc = "Vector saturating rounding doubling multiply high with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s32)"]
+#[doc = "Add pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh)
+    assert_instr(addp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34383,18 +35604,18 @@ pub fn vqrdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
-    vqrdmulh_s32(a, vdup_n_s32(b))
+pub fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe { transmute(vpadd_s16(transmute(a), transmute(b))) }
 }
-#[doc = "Vector saturating rounding doubling multiply high with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s32)"]
+#[doc = "Add pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh)
+    assert_instr(addp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34404,18 +35625,18 @@ pub fn vqrdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
-    vqrdmulhq_s32(a, vdupq_n_s32(b))
+pub fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe { transmute(vpadd_s32(transmute(a), transmute(b))) }
 }
-#[doc = "Signed saturating rounding doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s16)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh)
+    assert_instr(saddlp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34425,26 +35646,26 @@ pub fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+pub fn vpaddl_s8(a: int8x8_t) -> int16x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrdmulh.v4i16"
+            link_name = "llvm.aarch64.neon.saddlp.v4i16.v8i8"
         )]
-        fn _vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i16.v8i8")]
+        fn _vpaddl_s8(a: int8x8_t) -> int16x4_t;
     }
-    unsafe { _vqrdmulh_s16(a, b) }
+    unsafe { _vpaddl_s8(a) }
 }
-#[doc = "Signed saturating rounding doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s16)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh)
+    assert_instr(saddlp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34454,26 +35675,26 @@ pub fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+pub fn vpaddlq_s8(a: int8x16_t) -> int16x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrdmulh.v8i16"
+            link_name = "llvm.aarch64.neon.saddlp.v8i16.v16i8"
         )]
-        fn _vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v8i16.v16i8")]
+        fn _vpaddlq_s8(a: int8x16_t) -> int16x8_t;
     }
-    unsafe { _vqrdmulhq_s16(a, b) }
+    unsafe { _vpaddlq_s8(a) }
 }
-#[doc = "Signed saturating rounding doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s32)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh)
+    assert_instr(saddlp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34483,26 +35704,26 @@ pub fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+pub fn vpaddl_s16(a: int16x4_t) -> int32x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrdmulh.v2i32"
+            link_name = "llvm.aarch64.neon.saddlp.v2i32.v4i16"
         )]
-        fn _vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i32.v4i16")]
+        fn _vpaddl_s16(a: int16x4_t) -> int32x2_t;
     }
-    unsafe { _vqrdmulh_s32(a, b) }
+    unsafe { _vpaddl_s16(a) }
 }
-#[doc = "Signed saturating rounding doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s32)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh)
+    assert_instr(saddlp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34512,26 +35733,26 @@ pub fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+pub fn vpaddlq_s16(a: int16x8_t) -> int32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrdmulh.v4i32"
+            link_name = "llvm.aarch64.neon.saddlp.v4i32.v8i16"
         )]
-        fn _vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i32.v8i16")]
+        fn _vpaddlq_s16(a: int16x8_t) -> int32x4_t;
     }
-    unsafe { _vqrdmulhq_s32(a, b) }
+    unsafe { _vpaddlq_s16(a) }
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s8)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrshl)
+    assert_instr(saddlp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34541,26 +35762,26 @@ pub fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+pub fn vpaddl_s32(a: int32x2_t) -> int64x1_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.v8i8"
+            link_name = "llvm.aarch64.neon.saddlp.v1i64.v2i32"
         )]
-        fn _vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v1i64.v2i32")]
+        fn _vpaddl_s32(a: int32x2_t) -> int64x1_t;
     }
-    unsafe { _vqrshl_s8(a, b) }
+    unsafe { _vpaddl_s32(a) }
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s8)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrshl)
+    assert_instr(saddlp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34570,26 +35791,26 @@ pub fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+pub fn vpaddlq_s32(a: int32x4_t) -> int64x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.v16i8"
+            link_name = "llvm.aarch64.neon.saddlp.v2i64.v4i32"
         )]
-        fn _vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i64.v4i32")]
+        fn _vpaddlq_s32(a: int32x4_t) -> int64x2_t;
     }
-    unsafe { _vqrshlq_s8(a, b) }
+    unsafe { _vpaddlq_s32(a) }
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s16)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrshl)
+    assert_instr(uaddlp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34599,26 +35820,26 @@ pub fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+pub fn vpaddl_u8(a: uint8x8_t) -> uint16x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.v4i16"
+            link_name = "llvm.aarch64.neon.uaddlp.v4i16.v8i8"
         )]
-        fn _vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i16.v8i8")]
+        fn _vpaddl_u8(a: uint8x8_t) -> uint16x4_t;
     }
-    unsafe { _vqrshl_s16(a, b) }
+    unsafe { _vpaddl_u8(a) }
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s16)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrshl)
+    assert_instr(uaddlp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34628,26 +35849,26 @@ pub fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+pub fn vpaddlq_u8(a: uint8x16_t) -> uint16x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.v8i16"
+            link_name = "llvm.aarch64.neon.uaddlp.v8i16.v16i8"
         )]
-        fn _vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v8i16.v16i8")]
+        fn _vpaddlq_u8(a: uint8x16_t) -> uint16x8_t;
     }
-    unsafe { _vqrshlq_s16(a, b) }
+    unsafe { _vpaddlq_u8(a) }
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s32)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrshl)
+    assert_instr(uaddlp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34657,26 +35878,26 @@ pub fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+pub fn vpaddl_u16(a: uint16x4_t) -> uint32x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.v2i32"
+            link_name = "llvm.aarch64.neon.uaddlp.v2i32.v4i16"
         )]
-        fn _vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i32.v4i16")]
+        fn _vpaddl_u16(a: uint16x4_t) -> uint32x2_t;
     }
-    unsafe { _vqrshl_s32(a, b) }
+    unsafe { _vpaddl_u16(a) }
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s32)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrshl)
+    assert_instr(uaddlp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34686,26 +35907,26 @@ pub fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+pub fn vpaddlq_u16(a: uint16x8_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.v4i32"
+            link_name = "llvm.aarch64.neon.uaddlp.v4i32.v8i16"
         )]
-        fn _vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i32.v8i16")]
+        fn _vpaddlq_u16(a: uint16x8_t) -> uint32x4_t;
     }
-    unsafe { _vqrshlq_s32(a, b) }
+    unsafe { _vpaddlq_u16(a) }
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s64)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrshl)
+    assert_instr(uaddlp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34715,26 +35936,26 @@ pub fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+pub fn vpaddl_u32(a: uint32x2_t) -> uint64x1_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v1i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.v1i64"
+            link_name = "llvm.aarch64.neon.uaddlp.v1i64.v2i32"
         )]
-        fn _vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v1i64.v2i32")]
+        fn _vpaddl_u32(a: uint32x2_t) -> uint64x1_t;
     }
-    unsafe { _vqrshl_s64(a, b) }
+    unsafe { _vpaddl_u32(a) }
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s64)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrshl)
+    assert_instr(uaddlp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34744,26 +35965,27 @@ pub fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+pub fn vpaddlq_u32(a: uint32x4_t) -> uint64x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.v2i64"
+            link_name = "llvm.aarch64.neon.uaddlp.v2i64.v4i32"
         )]
-        fn _vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i64.v4i32")]
+        fn _vpaddlq_u32(a: uint32x4_t) -> uint64x2_t;
     }
-    unsafe { _vqrshlq_s64(a, b) }
+    unsafe { _vpaddlq_u32(a) }
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u8)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqrshl)
+    assert_instr(fmaxp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34773,26 +35995,27 @@ pub fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
+pub fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.v8i8"
+            link_name = "llvm.aarch64.neon.fmaxp.v2f32"
         )]
-        fn _vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")]
+        fn _vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
-    unsafe { _vqrshl_u8(a, b) }
+    unsafe { _vpmax_f32(a, b) }
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u8)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqrshl)
+    assert_instr(fmaxp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34802,26 +36025,32 @@ pub fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
+pub fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.v16i8"
+            link_name = "llvm.aarch64.neon.fmaxp.v2f32"
         )]
-        fn _vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")]
+        fn _vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    unsafe {
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float32x2_t = _vpmax_f32(a, b);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
-    unsafe { _vqrshlq_u8(a, b) }
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u16)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqrshl)
+    assert_instr(smaxp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34831,26 +36060,27 @@ pub fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
+pub fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.v4i16"
+            link_name = "llvm.aarch64.neon.smaxp.v8i8"
         )]
-        fn _vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v8i8")]
+        fn _vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-    unsafe { _vqrshl_u16(a, b) }
+    unsafe { _vpmax_s8(a, b) }
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u16)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqrshl)
+    assert_instr(smaxp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34860,26 +36090,32 @@ pub fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
+pub fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.v8i16"
+            link_name = "llvm.aarch64.neon.smaxp.v8i8"
         )]
-        fn _vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v8i8")]
+        fn _vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    unsafe {
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = _vpmax_s8(a, b);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
-    unsafe { _vqrshlq_u16(a, b) }
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u32)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqrshl)
+    assert_instr(smaxp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34889,26 +36125,27 @@ pub fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
+pub fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.v2i32"
+            link_name = "llvm.aarch64.neon.smaxp.v4i16"
         )]
-        fn _vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v4i16")]
+        fn _vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-    unsafe { _vqrshl_u32(a, b) }
+    unsafe { _vpmax_s16(a, b) }
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u32)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqrshl)
+    assert_instr(smaxp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34918,26 +36155,32 @@ pub fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
+pub fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.v4i32"
+            link_name = "llvm.aarch64.neon.smaxp.v4i16"
         )]
-        fn _vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v4i16")]
+        fn _vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    unsafe {
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: int16x4_t = _vpmax_s16(a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
-    unsafe { _vqrshlq_u32(a, b) }
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u64)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqrshl)
+    assert_instr(smaxp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34947,26 +36190,27 @@ pub fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
+pub fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v1i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.v1i64"
+            link_name = "llvm.aarch64.neon.smaxp.v2i32"
         )]
-        fn _vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2i32")]
+        fn _vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-    unsafe { _vqrshl_u64(a, b) }
+    unsafe { _vpmax_s32(a, b) }
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u64)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqrshl)
+    assert_instr(smaxp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34976,343 +36220,193 @@ pub fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
+pub fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.v2i64"
+            link_name = "llvm.aarch64.neon.smaxp.v2i32"
         )]
-        fn _vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2i32")]
+        fn _vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-    unsafe { _vqrshlq_u64(a, b) }
-}
-#[doc = "Signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v8i8")]
-        fn _vqrshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
+    unsafe {
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: int32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: int32x2_t = _vpmax_s32(a, b);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
-    unsafe { _vqrshrn_n_s16(a, const { int16x8_t([-N as i16; 8]) }) }
 }
-#[doc = "Signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u8)"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umaxp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v4i16")]
-        fn _vqrshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umaxp.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v8i8")]
+        fn _vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
     }
-    unsafe { _vqrshrn_n_s32(a, const { int32x4_t([-N; 4]) }) }
+    unsafe { _vpmax_u8(a, b) }
 }
-#[doc = "Signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u8)"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umaxp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v2i32")]
-        fn _vqrshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umaxp.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v8i8")]
+        fn _vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
+    }
+    unsafe {
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x8_t = _vpmax_u8(a, b);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
-    unsafe { _vqrshrn_n_s64(a, const { int64x2_t([-N as i64; 2]) }) }
 }
-#[doc = "Signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umaxp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshrn.v8i8"
+            link_name = "llvm.aarch64.neon.umaxp.v4i16"
         )]
-        fn _vqrshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v4i16")]
+        fn _vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
     }
-    unsafe { _vqrshrn_n_s16(a, N) }
+    unsafe { _vpmax_u16(a, b) }
 }
-#[doc = "Signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umaxp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshrn.v4i16"
+            link_name = "llvm.aarch64.neon.umaxp.v4i16"
         )]
-        fn _vqrshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v4i16")]
+        fn _vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
+    }
+    unsafe {
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: uint16x4_t = _vpmax_u16(a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
-    unsafe { _vqrshrn_n_s32(a, N) }
 }
-#[doc = "Signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umaxp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshrn.v2i32"
+            link_name = "llvm.aarch64.neon.umaxp.v2i32"
         )]
-        fn _vqrshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v2i32")]
+        fn _vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
     }
-    unsafe { _vqrshrn_n_s64(a, N) }
+    unsafe { _vpmax_u32(a, b) }
 }
-#[doc = "Unsigned signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v8i8")]
-        fn _vqrshrn_n_u16(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t;
-    }
-    unsafe { _vqrshrn_n_u16(a, const { uint16x8_t([-N as u16; 8]) }) }
-}
-#[doc = "Unsigned signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v4i16")]
-        fn _vqrshrn_n_u32(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t;
-    }
-    unsafe { _vqrshrn_n_u32(a, const { uint32x4_t([-N as u32; 4]) }) }
-}
-#[doc = "Unsigned signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v2i32")]
-        fn _vqrshrn_n_u64(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t;
-    }
-    unsafe { _vqrshrn_n_u64(a, const { uint64x2_t([-N as u64; 2]) }) }
-}
-#[doc = "Unsigned signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshrn.v8i8"
-        )]
-        fn _vqrshrn_n_u16(a: uint16x8_t, n: i32) -> uint8x8_t;
-    }
-    unsafe { _vqrshrn_n_u16(a, N) }
-}
-#[doc = "Unsigned signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshrn.v4i16"
-        )]
-        fn _vqrshrn_n_u32(a: uint32x4_t, n: i32) -> uint16x4_t;
-    }
-    unsafe { _vqrshrn_n_u32(a, N) }
-}
-#[doc = "Unsigned signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshrn.v2i32"
-        )]
-        fn _vqrshrn_n_u64(a: uint64x2_t, n: i32) -> uint32x2_t;
-    }
-    unsafe { _vqrshrn_n_u64(a, N) }
-}
-#[doc = "Signed saturating rounded shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v8i8")]
-        fn _vqrshrun_n_s16(a: int16x8_t, n: int16x8_t) -> uint8x8_t;
-    }
-    unsafe { _vqrshrun_n_s16(a, const { int16x8_t([-N as i16; 8]) }) }
-}
-#[doc = "Signed saturating rounded shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v4i16")]
-        fn _vqrshrun_n_s32(a: int32x4_t, n: int32x4_t) -> uint16x4_t;
-    }
-    unsafe { _vqrshrun_n_s32(a, const { int32x4_t([-N; 4]) }) }
-}
-#[doc = "Signed saturating rounded shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v2i32")]
-        fn _vqrshrun_n_s64(a: int64x2_t, n: int64x2_t) -> uint32x2_t;
-    }
-    unsafe { _vqrshrun_n_s64(a, const { int64x2_t([-N as i64; 2]) }) }
-}
-#[doc = "Signed saturating rounded shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshrun.v8i8"
-        )]
-        fn _vqrshrun_n_s16(a: int16x8_t, n: i32) -> uint8x8_t;
-    }
-    unsafe { _vqrshrun_n_s16(a, N) }
-}
-#[doc = "Signed saturating rounded shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshrun.v4i16"
-        )]
-        fn _vqrshrun_n_s32(a: int32x4_t, n: i32) -> uint16x4_t;
-    }
-    unsafe { _vqrshrun_n_s32(a, N) }
-}
-#[doc = "Signed saturating rounded shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshrun.v2i32"
-        )]
-        fn _vqrshrun_n_s64(a: int64x2_t, n: i32) -> uint32x2_t;
-    }
-    unsafe { _vqrshrun_n_s64(a, N) }
-}
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s8)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl, N = 2)
+    assert_instr(umaxp)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -35321,21 +36415,33 @@ pub fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshl_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    vqshl_s8(a, vdup_n_s8(N as _))
+pub fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umaxp.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v2i32")]
+        fn _vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
+    }
+    unsafe {
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: uint32x2_t = _vpmax_u32(a, b);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s8)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl, N = 2)
+    assert_instr(fminp)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -35344,21 +36450,28 @@ pub fn vqshl_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshlq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    vqshlq_s8(a, vdupq_n_s8(N as _))
+pub fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminp.v2f32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2f32")]
+        fn _vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    unsafe { _vpmin_f32(a, b) }
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s16)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl, N = 2)
+    assert_instr(fminp)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -35367,21 +36480,33 @@ pub fn vqshlq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshl_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    vqshl_s16(a, vdup_n_s16(N as _))
+pub fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminp.v2f32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2f32")]
+        fn _vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    unsafe {
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float32x2_t = _vpmin_f32(a, b);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s16)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl, N = 2)
+    assert_instr(sminp)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -35390,21 +36515,28 @@ pub fn vqshl_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshlq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    vqshlq_s16(a, vdupq_n_s16(N as _))
+pub fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sminp.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v8i8")]
+        fn _vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    unsafe { _vpmin_s8(a, b) }
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s32)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl, N = 2)
+    assert_instr(sminp)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -35413,21 +36545,33 @@ pub fn vqshlq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshl_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(N, 5);
-    vqshl_s32(a, vdup_n_s32(N as _))
+pub fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sminp.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v8i8")]
+        fn _vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    unsafe {
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = _vpmin_s8(a, b);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s32)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl, N = 2)
+    assert_instr(sminp)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -35436,21 +36580,28 @@ pub fn vqshl_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshlq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 5);
-    vqshlq_s32(a, vdupq_n_s32(N as _))
+pub fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sminp.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v4i16")]
+        fn _vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    unsafe { _vpmin_s16(a, b) }
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s64)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl, N = 2)
+    assert_instr(sminp)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -35459,21 +36610,33 @@ pub fn vqshlq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshl_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
-    static_assert_uimm_bits!(N, 6);
-    vqshl_s64(a, vdup_n_s64(N as _))
+pub fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sminp.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v4i16")]
+        fn _vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    unsafe {
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: int16x4_t = _vpmin_s16(a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s64)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl, N = 2)
+    assert_instr(sminp)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -35482,21 +36645,28 @@ pub fn vqshl_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshlq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 6);
-    vqshlq_s64(a, vdupq_n_s64(N as _))
+pub fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sminp.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2i32")]
+        fn _vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    unsafe { _vpmin_s32(a, b) }
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u8)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl, N = 2)
+    assert_instr(sminp)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -35505,21 +36675,33 @@ pub fn vqshlq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshl_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    vqshl_u8(a, vdup_n_s8(N as _))
+pub fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sminp.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2i32")]
+        fn _vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    unsafe {
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: int32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: int32x2_t = _vpmin_s32(a, b);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u8)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl, N = 2)
+    assert_instr(uminp)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -35528,21 +36710,28 @@ pub fn vqshl_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshlq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    vqshlq_u8(a, vdupq_n_s8(N as _))
+pub fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uminp.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v8i8")]
+        fn _vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
+    }
+    unsafe { _vpmin_u8(a, b) }
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u16)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl, N = 2)
+    assert_instr(uminp)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -35551,21 +36740,33 @@ pub fn vqshlq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshl_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    vqshl_u16(a, vdup_n_s16(N as _))
+pub fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uminp.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v8i8")]
+        fn _vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
+    }
+    unsafe {
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x8_t = _vpmin_u8(a, b);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u16)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl, N = 2)
+    assert_instr(uminp)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -35574,21 +36775,28 @@ pub fn vqshl_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshlq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    vqshlq_u16(a, vdupq_n_s16(N as _))
+pub fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uminp.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v4i16")]
+        fn _vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
+    }
+    unsafe { _vpmin_u16(a, b) }
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u32)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl, N = 2)
+    assert_instr(uminp)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -35597,21 +36805,33 @@ pub fn vqshlq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshl_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(N, 5);
-    vqshl_u32(a, vdup_n_s32(N as _))
+pub fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uminp.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v4i16")]
+        fn _vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
+    }
+    unsafe {
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: uint16x4_t = _vpmin_u16(a, b);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u32)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl, N = 2)
+    assert_instr(uminp)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -35620,21 +36840,28 @@ pub fn vqshl_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshlq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(N, 5);
-    vqshlq_u32(a, vdupq_n_s32(N as _))
+pub fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uminp.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v2i32")]
+        fn _vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
+    }
+    unsafe { _vpmin_u32(a, b) }
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u64)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl, N = 2)
+    assert_instr(uminp)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -35643,21 +36870,32 @@ pub fn vqshlq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshl_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
-    static_assert_uimm_bits!(N, 6);
-    vqshl_u64(a, vdup_n_s64(N as _))
+pub fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uminp.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v2i32")]
+        fn _vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
+    }
+    unsafe {
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: uint32x2_t = _vpmin_u32(a, b);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u64)"]
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl, N = 2)
+    assert_instr(sqabs)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -35666,19 +36904,26 @@ pub fn vqshl_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshlq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(N, 6);
-    vqshlq_u64(a, vdupq_n_s64(N as _))
+pub fn vqabs_s8(a: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqabs.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i8")]
+        fn _vqabs_s8(a: int8x8_t) -> int8x8_t;
+    }
+    unsafe { _vqabs_s8(a) }
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s8)"]
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl)
+    assert_instr(sqabs)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35688,26 +36933,26 @@ pub fn vqshlq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+pub fn vqabsq_s8(a: int8x16_t) -> int8x16_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshl.v8i8"
+            link_name = "llvm.aarch64.neon.sqabs.v16i8"
         )]
-        fn _vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v16i8")]
+        fn _vqabsq_s8(a: int8x16_t) -> int8x16_t;
     }
-    unsafe { _vqshl_s8(a, b) }
+    unsafe { _vqabsq_s8(a) }
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s8)"]
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl)
+    assert_instr(sqabs)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35717,26 +36962,26 @@ pub fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+pub fn vqabs_s16(a: int16x4_t) -> int16x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshl.v16i8"
+            link_name = "llvm.aarch64.neon.sqabs.v4i16"
         )]
-        fn _vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i16")]
+        fn _vqabs_s16(a: int16x4_t) -> int16x4_t;
     }
-    unsafe { _vqshlq_s8(a, b) }
+    unsafe { _vqabs_s16(a) }
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s16)"]
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl)
+    assert_instr(sqabs)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35746,26 +36991,26 @@ pub fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+pub fn vqabsq_s16(a: int16x8_t) -> int16x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshl.v4i16"
+            link_name = "llvm.aarch64.neon.sqabs.v8i16"
         )]
-        fn _vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i16")]
+        fn _vqabsq_s16(a: int16x8_t) -> int16x8_t;
     }
-    unsafe { _vqshl_s16(a, b) }
+    unsafe { _vqabsq_s16(a) }
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s16)"]
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl)
+    assert_instr(sqabs)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35775,26 +37020,26 @@ pub fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+pub fn vqabs_s32(a: int32x2_t) -> int32x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshl.v8i16"
+            link_name = "llvm.aarch64.neon.sqabs.v2i32"
         )]
-        fn _vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v2i32")]
+        fn _vqabs_s32(a: int32x2_t) -> int32x2_t;
     }
-    unsafe { _vqshlq_s16(a, b) }
+    unsafe { _vqabs_s32(a) }
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s32)"]
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl)
+    assert_instr(sqabs)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35804,26 +37049,26 @@ pub fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+pub fn vqabsq_s32(a: int32x4_t) -> int32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshl.v2i32"
+            link_name = "llvm.aarch64.neon.sqabs.v4i32"
         )]
-        fn _vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i32")]
+        fn _vqabsq_s32(a: int32x4_t) -> int32x4_t;
     }
-    unsafe { _vqshl_s32(a, b) }
+    unsafe { _vqabsq_s32(a) }
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s32)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl)
+    assert_instr(sqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35833,26 +37078,18 @@ pub fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshl.v4i32"
-        )]
-        fn _vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-    unsafe { _vqshlq_s32(a, b) }
+pub fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s64)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl)
+    assert_instr(sqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35862,26 +37099,18 @@ pub fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v1i64")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshl.v1i64"
-        )]
-        fn _vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
-    }
-    unsafe { _vqshl_s64(a, b) }
+pub fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s64)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl)
+    assert_instr(sqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35891,26 +37120,18 @@ pub fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i64")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshl.v2i64"
-        )]
-        fn _vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
-    }
-    unsafe { _vqshlq_s64(a, b) }
+pub fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u8)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl)
+    assert_instr(sqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35920,26 +37141,18 @@ pub fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshl.v8i8"
-        )]
-        fn _vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
-    }
-    unsafe { _vqshl_u8(a, b) }
+pub fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u8)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl)
+    assert_instr(sqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35949,26 +37162,18 @@ pub fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v16i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshl.v16i8"
-        )]
-        fn _vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
-    }
-    unsafe { _vqshlq_u8(a, b) }
+pub fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u16)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl)
+    assert_instr(sqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35978,26 +37183,18 @@ pub fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshl.v4i16"
-        )]
-        fn _vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
-    }
-    unsafe { _vqshl_u16(a, b) }
+pub fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u16)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl)
+    assert_instr(sqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36007,26 +37204,18 @@ pub fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshl.v8i16"
-        )]
-        fn _vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
-    }
-    unsafe { _vqshlq_u16(a, b) }
+pub fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u32)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl)
+    assert_instr(sqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36036,26 +37225,18 @@ pub fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshl.v2i32"
-        )]
-        fn _vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
-    }
-    unsafe { _vqshl_u32(a, b) }
+pub fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u32)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl)
+    assert_instr(uqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36065,26 +37246,18 @@ pub fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshl.v4i32"
-        )]
-        fn _vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
-    }
-    unsafe { _vqshlq_u32(a, b) }
+pub fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u64)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl)
+    assert_instr(uqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36094,26 +37267,18 @@ pub fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v1i64")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshl.v1i64"
-        )]
-        fn _vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
-    }
-    unsafe { _vqshl_u64(a, b) }
+pub fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u64)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl)
+    assert_instr(uqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36123,622 +37288,401 @@ pub fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i64")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshl.v2i64"
-        )]
-        fn _vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
-    }
-    unsafe { _vqshlq_u64(a, b) }
+pub fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u16)"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i8")]
-        fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> uint8x8_t;
-    }
-    unsafe { _vqshlu_n_s8(a, const { int8x8_t([N as i8; 8]) }) }
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u32)"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v16i8")]
-        fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> uint8x16_t;
-    }
-    unsafe { _vqshluq_n_s8(a, const { int8x16_t([N as i8; 16]) }) }
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u32)"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i16")]
-        fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> uint16x4_t;
-    }
-    unsafe { _vqshlu_n_s16(a, const { int16x4_t([N as i16; 4]) }) }
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u64)"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i16")]
-        fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> uint16x8_t;
-    }
-    unsafe { _vqshluq_n_s16(a, const { int16x8_t([N as i16; 8]) }) }
-}
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(N, 5);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i32")]
-        fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> uint32x2_t;
-    }
-    unsafe { _vqshlu_n_s32(a, const { int32x2_t([N; 2]) }) }
-}
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(N, 5);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i32")]
-        fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> uint32x4_t;
-    }
-    unsafe { _vqshluq_n_s32(a, const { int32x4_t([N; 4]) }) }
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u64)"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshlu_n_s64<const N: i32>(a: int64x1_t) -> uint64x1_t {
-    static_assert_uimm_bits!(N, 6);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v1i64")]
-        fn _vqshlu_n_s64(a: int64x1_t, n: int64x1_t) -> uint64x1_t;
-    }
-    unsafe { _vqshlu_n_s64(a, const { int64x1_t([N as i64]) }) }
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)"]
+#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s16)"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(N, 6);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i64")]
-        fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> uint64x2_t;
-    }
-    unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64; 2]) }) }
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 2))]
+#[cfg_attr(
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(sqdmlal, N = 2)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vqdmlal_lane_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    vqaddq_s32(a, vqdmull_lane_s16::<N>(b, c))
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"]
+#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshlu.v8i8"
-        )]
-        fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> uint8x8_t;
-    }
-    unsafe { _vqshlu_n_s8(a, const { int8x8_t([N as i8; 8]) }) }
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 1))]
+#[cfg_attr(
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(sqdmlal, N = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vqdmlal_lane_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    vqaddq_s64(a, vqdmull_lane_s32::<N>(b, c))
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"]
+#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshlu.v16i8"
-        )]
-        fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> uint8x16_t;
-    }
-    unsafe { _vqshluq_n_s8(a, const { int8x16_t([N as i8; 16]) }) }
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
+    vqaddq_s32(a, vqdmull_n_s16(b, c))
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"]
+#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshlu.v4i16"
-        )]
-        fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> uint16x4_t;
-    }
-    unsafe { _vqshlu_n_s16(a, const { int16x4_t([N as i16; 4]) }) }
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
+    vqaddq_s64(a, vqdmull_n_s32(b, c))
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"]
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshlu.v8i16"
-        )]
-        fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> uint16x8_t;
-    }
-    unsafe { _vqshluq_n_s16(a, const { int16x8_t([N as i16; 8]) }) }
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    vqaddq_s32(a, vqdmull_s16(b, c))
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"]
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(N, 5);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshlu.v2i32"
-        )]
-        fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> uint32x2_t;
-    }
-    unsafe { _vqshlu_n_s32(a, const { int32x2_t([N; 2]) }) }
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    vqaddq_s64(a, vqdmull_s32(b, c))
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"]
+#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(N, 5);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshlu.v4i32"
-        )]
-        fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> uint32x4_t;
-    }
-    unsafe { _vqshluq_n_s32(a, const { int32x4_t([N; 4]) }) }
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 2))]
+#[cfg_attr(
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(sqdmlsl, N = 2)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vqdmlsl_lane_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    vqsubq_s32(a, vqdmull_lane_s16::<N>(b, c))
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"]
+#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshlu_n_s64<const N: i32>(a: int64x1_t) -> uint64x1_t {
-    static_assert_uimm_bits!(N, 6);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshlu.v1i64"
-        )]
-        fn _vqshlu_n_s64(a: int64x1_t, n: int64x1_t) -> uint64x1_t;
-    }
-    unsafe { _vqshlu_n_s64(a, const { int64x1_t([N as i64]) }) }
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 1))]
+#[cfg_attr(
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(sqdmlsl, N = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vqdmlsl_lane_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    vqsubq_s64(a, vqdmull_lane_s32::<N>(b, c))
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)"]
+#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(N, 6);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshlu.v2i64"
-        )]
-        fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> uint64x2_t;
-    }
-    unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64; 2]) }) }
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlsl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
+    vqsubq_s32(a, vqdmull_n_s16(b, c))
 }
-#[doc = "Signed saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"]
+#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s32)"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v8i8")]
-        fn _vqshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
-    }
-    unsafe { _vqshrn_n_s16(a, const { int16x8_t([-N as i16; 8]) }) }
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlsl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
+    vqsubq_s64(a, vqdmull_n_s32(b, c))
 }
-#[doc = "Signed saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)"]
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s16)"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v4i16")]
-        fn _vqshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
-    }
-    unsafe { _vqshrn_n_s32(a, const { int32x4_t([-N; 4]) }) }
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlsl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    vqsubq_s32(a, vqdmull_s16(b, c))
 }
-#[doc = "Signed saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)"]
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s32)"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v2i32")]
-        fn _vqshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
-    }
-    unsafe { _vqshrn_n_s64(a, const { int64x2_t([-N as i64; 2]) }) }
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlsl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    vqsubq_s64(a, vqdmull_s32(b, c))
 }
-#[doc = "Signed saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshrn.v8i8"
-        )]
-        fn _vqshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t;
-    }
-    unsafe { _vqshrn_n_s16(a, N) }
-}
-#[doc = "Signed saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshrn.v4i16"
-        )]
-        fn _vqshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t;
-    }
-    unsafe { _vqshrn_n_s32(a, N) }
-}
-#[doc = "Signed saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshrn.v2i32"
-        )]
-        fn _vqshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t;
-    }
-    unsafe { _vqshrn_n_s64(a, N) }
-}
-#[doc = "Unsigned saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v8i8")]
-        fn _vqshrn_n_u16(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t;
-    }
-    unsafe { _vqshrn_n_u16(a, const { uint16x8_t([-N as u16; 8]) }) }
-}
-#[doc = "Unsigned saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v4i16")]
-        fn _vqshrn_n_u32(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t;
-    }
-    unsafe { _vqshrn_n_u32(a, const { uint32x4_t([-N as u32; 4]) }) }
-}
-#[doc = "Unsigned saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v2i32")]
-        fn _vqshrn_n_u64(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t;
-    }
-    unsafe { _vqshrn_n_u64(a, const { uint64x2_t([-N as u64; 2]) }) }
-}
-#[doc = "Unsigned saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshrn.v8i8"
-        )]
-        fn _vqshrn_n_u16(a: uint16x8_t, n: i32) -> uint8x8_t;
-    }
-    unsafe { _vqshrn_n_u16(a, N) }
-}
-#[doc = "Unsigned saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshrn.v4i16"
-        )]
-        fn _vqshrn_n_u32(a: uint32x4_t, n: i32) -> uint16x4_t;
-    }
-    unsafe { _vqshrn_n_u32(a, N) }
-}
-#[doc = "Unsigned saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshrn.v2i32"
-        )]
-        fn _vqshrn_n_u64(a: uint64x2_t, n: i32) -> uint32x2_t;
-    }
-    unsafe { _vqshrn_n_u64(a, N) }
-}
-#[doc = "Signed saturating shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v8i8")]
-        fn _vqshrun_n_s16(a: int16x8_t, n: int16x8_t) -> uint8x8_t;
-    }
-    unsafe { _vqshrun_n_s16(a, const { int16x8_t([-N as i16; 8]) }) }
-}
-#[doc = "Signed saturating shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v4i16")]
-        fn _vqshrun_n_s32(a: int32x4_t, n: int32x4_t) -> uint16x4_t;
-    }
-    unsafe { _vqshrun_n_s32(a, const { int32x4_t([-N; 4]) }) }
-}
-#[doc = "Signed saturating shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v2i32")]
-        fn _vqshrun_n_s64(a: int64x2_t, n: int64x2_t) -> uint32x2_t;
-    }
-    unsafe { _vqshrun_n_s64(a, const { int64x2_t([-N as i64; 2]) }) }
-}
-#[doc = "Signed saturating shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshrun.v8i8"
-        )]
-        fn _vqshrun_n_s16(a: int16x8_t, n: i32) -> uint8x8_t;
-    }
-    unsafe { _vqshrun_n_s16(a, N) }
-}
-#[doc = "Signed saturating shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshrun.v4i16"
-        )]
-        fn _vqshrun_n_s32(a: int32x4_t, n: i32) -> uint16x4_t;
-    }
-    unsafe { _vqshrun_n_s32(a, N) }
-}
-#[doc = "Signed saturating shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshrun.v2i32"
-        )]
-        fn _vqshrun_n_s64(a: int64x2_t, n: i32) -> uint32x2_t;
-    }
-    unsafe { _vqshrun_n_s64(a, N) }
-}
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s8)"]
+#[doc = "Vector saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqsub)
+    assert_instr(sqdmulh, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -36747,19 +37691,21 @@ pub fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe { simd_saturating_sub(a, b) }
+pub fn vqdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vqdmulh_s16(a, vdup_n_s16(vgetq_lane_s16::<LANE>(b)))
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s8)"]
+#[doc = "Vector saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqsub)
+    assert_instr(sqdmulh, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -36768,19 +37714,21 @@ pub fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe { simd_saturating_sub(a, b) }
+pub fn vqdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vqdmulhq_s16(a, vdupq_n_s16(vgetq_lane_s16::<LANE>(b)))
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s16)"]
+#[doc = "Vector saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqsub)
+    assert_instr(sqdmulh, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -36789,19 +37737,21 @@ pub fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe { simd_saturating_sub(a, b) }
+pub fn vqdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vqdmulh_s32(a, vdup_n_s32(vgetq_lane_s32::<LANE>(b)))
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s16)"]
+#[doc = "Vector saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqsub)
+    assert_instr(sqdmulh, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -36810,18 +37760,19 @@ pub fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe { simd_saturating_sub(a, b) }
+pub fn vqdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vqdmulhq_s32(a, vdupq_n_s32(vgetq_lane_s32::<LANE>(b)))
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s32)"]
+#[doc = "Vector saturating doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqsub)
+    assert_instr(sqdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36831,18 +37782,19 @@ pub fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe { simd_saturating_sub(a, b) }
+pub fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
+    let b: int16x4_t = vdup_n_s16(b);
+    vqdmulh_s16(a, b)
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s32)"]
+#[doc = "Vector saturating doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqsub)
+    assert_instr(sqdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36852,18 +37804,19 @@ pub fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe { simd_saturating_sub(a, b) }
+pub fn vqdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
+    let b: int16x8_t = vdupq_n_s16(b);
+    vqdmulhq_s16(a, b)
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s64)"]
+#[doc = "Vector saturating doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqsub)
+    assert_instr(sqdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36873,18 +37826,19 @@ pub fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    unsafe { simd_saturating_sub(a, b) }
+pub fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
+    let b: int32x2_t = vdup_n_s32(b);
+    vqdmulh_s32(a, b)
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s64)"]
+#[doc = "Vector saturating doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqsub)
+    assert_instr(sqdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36894,18 +37848,19 @@ pub fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    unsafe { simd_saturating_sub(a, b) }
+pub fn vqdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
+    let b: int32x4_t = vdupq_n_s32(b);
+    vqdmulhq_s32(a, b)
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u8)"]
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqsub)
+    assert_instr(sqdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36915,18 +37870,26 @@ pub fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe { simd_saturating_sub(a, b) }
+pub fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqdmulh.v4i16"
+        )]
+        fn _vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    unsafe { _vqdmulh_s16(a, b) }
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u8)"]
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqsub)
+    assert_instr(sqdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36936,18 +37899,26 @@ pub fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe { simd_saturating_sub(a, b) }
+pub fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqdmulh.v8i16"
+        )]
+        fn _vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    unsafe { _vqdmulhq_s16(a, b) }
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u16)"]
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqsub)
+    assert_instr(sqdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36957,18 +37928,26 @@ pub fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe { simd_saturating_sub(a, b) }
+pub fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqdmulh.v2i32"
+        )]
+        fn _vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    unsafe { _vqdmulh_s32(a, b) }
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u16)"]
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqsub)
+    assert_instr(sqdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36978,19 +37957,28 @@ pub fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe { simd_saturating_sub(a, b) }
+pub fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqdmulh.v4i32"
+        )]
+        fn _vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    unsafe { _vqdmulhq_s32(a, b) }
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u32)"]
+#[doc = "Vector saturating doubling long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqsub)
+    assert_instr(sqdmull, N = 2)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -36999,19 +37987,22 @@ pub fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe { simd_saturating_sub(a, b) }
+pub fn vqdmull_lane_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    let b = vdup_lane_s16::<N>(b);
+    vqdmull_s16(a, b)
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u32)"]
+#[doc = "Vector saturating doubling long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqsub)
+    assert_instr(sqdmull, N = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -37020,18 +38011,20 @@ pub fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe { simd_saturating_sub(a, b) }
+pub fn vqdmull_lane_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    let b = vdup_lane_s32::<N>(b);
+    vqdmull_s32(a, b)
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u64)"]
+#[doc = "Vector saturating doubling long multiply with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqsub)
+    assert_instr(sqdmull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37041,18 +38034,18 @@ pub fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    unsafe { simd_saturating_sub(a, b) }
+pub fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
+    vqdmull_s16(a, vdup_n_s16(b))
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u64)"]
+#[doc = "Vector saturating doubling long multiply with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqsub)
+    assert_instr(sqdmull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37062,22 +38055,18 @@ pub fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    unsafe { simd_saturating_sub(a, b) }
+pub fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
+    vqdmull_s32(a, vdup_n_s32(b))
 }
-#[doc = "Rounding Add returning High Narrow (high half)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s16)"]
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
 #[cfg_attr(
-    all(
-        test,
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        target_endian = "little"
-    ),
-    assert_instr(raddhn2)
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37087,23 +38076,26 @@ pub fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vraddhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
-    let x = vraddhn_s16(b, c);
-    vcombine_s8(a, x)
+pub fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqdmull.v4i32"
+        )]
+        fn _vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t;
+    }
+    unsafe { _vqdmull_s16(a, b) }
 }
-#[doc = "Rounding Add returning High Narrow (high half)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s32)"]
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
 #[cfg_attr(
-    all(
-        test,
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        target_endian = "little"
-    ),
-    assert_instr(raddhn2)
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37113,23 +38105,26 @@ pub fn vraddhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vraddhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
-    let x = vraddhn_s32(b, c);
-    vcombine_s16(a, x)
+pub fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v2i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqdmull.v2i64"
+        )]
+        fn _vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t;
+    }
+    unsafe { _vqdmull_s32(a, b) }
 }
-#[doc = "Rounding Add returning High Narrow (high half)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s64)"]
+#[doc = "Signed saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
 #[cfg_attr(
-    all(
-        test,
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        target_endian = "little"
-    ),
-    assert_instr(raddhn2)
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqxtn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37139,23 +38134,26 @@ pub fn vraddhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vraddhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
-    let x = vraddhn_s64(b, c);
-    vcombine_s32(a, x)
+pub fn vqmovn_s16(a: int16x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqxtn.v8i8"
+        )]
+        fn _vqmovn_s16(a: int16x8_t) -> int8x8_t;
+    }
+    unsafe { _vqmovn_s16(a) }
 }
-#[doc = "Rounding Add returning High Narrow (high half)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u16)"]
+#[doc = "Signed saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
 #[cfg_attr(
-    all(
-        test,
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        target_endian = "little"
-    ),
-    assert_instr(raddhn2)
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqxtn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37165,25 +38163,26 @@ pub fn vraddhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vraddhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
-    unsafe {
-        let x: uint8x8_t = transmute(vraddhn_s16(transmute(b), transmute(c)));
-        vcombine_u8(a, x)
+pub fn vqmovn_s32(a: int32x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqxtn.v4i16"
+        )]
+        fn _vqmovn_s32(a: int32x4_t) -> int16x4_t;
     }
+    unsafe { _vqmovn_s32(a) }
 }
-#[doc = "Rounding Add returning High Narrow (high half)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u32)"]
+#[doc = "Signed saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
 #[cfg_attr(
-    all(
-        test,
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        target_endian = "little"
-    ),
-    assert_instr(raddhn2)
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqxtn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37193,25 +38192,26 @@ pub fn vraddhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vraddhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
-    unsafe {
-        let x: uint16x4_t = transmute(vraddhn_s32(transmute(b), transmute(c)));
-        vcombine_u16(a, x)
+pub fn vqmovn_s64(a: int64x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqxtn.v2i32"
+        )]
+        fn _vqmovn_s64(a: int64x2_t) -> int32x2_t;
     }
+    unsafe { _vqmovn_s64(a) }
 }
-#[doc = "Rounding Add returning High Narrow (high half)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u64)"]
+#[doc = "Unsigned saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
 #[cfg_attr(
-    all(
-        test,
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        target_endian = "little"
-    ),
-    assert_instr(raddhn2)
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqxtn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37221,21 +38221,26 @@ pub fn vraddhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vraddhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
-    unsafe {
-        let x: uint32x2_t = transmute(vraddhn_s64(transmute(b), transmute(c)));
-        vcombine_u32(a, x)
+pub fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqxtn.v8i8"
+        )]
+        fn _vqmovn_u16(a: uint16x8_t) -> uint8x8_t;
     }
+    unsafe { _vqmovn_u16(a) }
 }
-#[doc = "Rounding Add returning High Narrow."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s16)"]
+#[doc = "Unsigned saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn)
+    assert_instr(uqxtn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37245,26 +38250,26 @@ pub fn vraddhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
+pub fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t {
     unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.raddhn.v8i8"
+            link_name = "llvm.aarch64.neon.uqxtn.v4i16"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v8i8")]
-        fn _vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t;
+        fn _vqmovn_u32(a: uint32x4_t) -> uint16x4_t;
     }
-    unsafe { _vraddhn_s16(a, b) }
+    unsafe { _vqmovn_u32(a) }
 }
-#[doc = "Rounding Add returning High Narrow."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s32)"]
+#[doc = "Unsigned saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn)
+    assert_instr(uqxtn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37274,26 +38279,26 @@ pub fn vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
+pub fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t {
     unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.raddhn.v4i16"
+            link_name = "llvm.aarch64.neon.uqxtn.v2i32"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v4i16")]
-        fn _vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t;
+        fn _vqmovn_u64(a: uint64x2_t) -> uint32x2_t;
     }
-    unsafe { _vraddhn_s32(a, b) }
+    unsafe { _vqmovn_u64(a) }
 }
-#[doc = "Rounding Add returning High Narrow."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s64)"]
+#[doc = "Signed saturating extract unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn)
+    assert_instr(sqxtun)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37303,26 +38308,26 @@ pub fn vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
+pub fn vqmovun_s16(a: int16x8_t) -> uint8x8_t {
     unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.raddhn.v2i32"
+            link_name = "llvm.aarch64.neon.sqxtun.v8i8"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v2i32")]
-        fn _vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t;
+        fn _vqmovun_s16(a: int16x8_t) -> uint8x8_t;
     }
-    unsafe { _vraddhn_s64(a, b) }
+    unsafe { _vqmovun_s16(a) }
 }
-#[doc = "Rounding Add returning High Narrow."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u16)"]
+#[doc = "Signed saturating extract unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn)
+    assert_instr(sqxtun)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37332,18 +38337,26 @@ pub fn vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
-    unsafe { transmute(vraddhn_s16(transmute(a), transmute(b))) }
+pub fn vqmovun_s32(a: int32x4_t) -> uint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqxtun.v4i16"
+        )]
+        fn _vqmovun_s32(a: int32x4_t) -> uint16x4_t;
+    }
+    unsafe { _vqmovun_s32(a) }
 }
-#[doc = "Rounding Add returning High Narrow."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u32)"]
+#[doc = "Signed saturating extract unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn)
+    assert_instr(sqxtun)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37353,18 +38366,26 @@ pub fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
-    unsafe { transmute(vraddhn_s32(transmute(a), transmute(b))) }
+pub fn vqmovun_s64(a: int64x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqxtun.v2i32"
+        )]
+        fn _vqmovun_s64(a: int64x2_t) -> uint32x2_t;
+    }
+    unsafe { _vqmovun_s64(a) }
 }
-#[doc = "Rounding Add returning High Narrow."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u64)"]
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn)
+    assert_instr(sqneg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37374,78 +38395,84 @@ pub fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
-    unsafe { transmute(vraddhn_s64(transmute(a), transmute(b))) }
+pub fn vqneg_s8(a: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqneg.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i8")]
+        fn _vqneg_s8(a: int8x8_t) -> int8x8_t;
+    }
+    unsafe { _vqneg_s8(a) }
 }
-#[doc = "Reciprocal estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f16)"]
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s8)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frecpe)
+    assert_instr(sqneg)
 )]
-#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vrecpe_f16(a: float16x4_t) -> float16x4_t {
+pub fn vqnegq_s8(a: int8x16_t) -> int8x16_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecpe.v4f16"
+            link_name = "llvm.aarch64.neon.sqneg.v16i8"
         )]
-        fn _vrecpe_f16(a: float16x4_t) -> float16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v16i8")]
+        fn _vqnegq_s8(a: int8x16_t) -> int8x16_t;
     }
-    unsafe { _vrecpe_f16(a) }
+    unsafe { _vqnegq_s8(a) }
 }
-#[doc = "Reciprocal estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f16)"]
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s16)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frecpe)
+    assert_instr(sqneg)
 )]
-#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vrecpeq_f16(a: float16x8_t) -> float16x8_t {
+pub fn vqneg_s16(a: int16x4_t) -> int16x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v8f16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecpe.v8f16"
+            link_name = "llvm.aarch64.neon.sqneg.v4i16"
         )]
-        fn _vrecpeq_f16(a: float16x8_t) -> float16x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i16")]
+        fn _vqneg_s16(a: int16x4_t) -> int16x4_t;
     }
-    unsafe { _vrecpeq_f16(a) }
+    unsafe { _vqneg_s16(a) }
 }
-#[doc = "Reciprocal estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f32)"]
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frecpe)
+    assert_instr(sqneg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37455,26 +38482,26 @@ pub fn vrecpeq_f16(a: float16x8_t) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrecpe_f32(a: float32x2_t) -> float32x2_t {
+pub fn vqnegq_s16(a: int16x8_t) -> int16x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecpe.v2f32"
+            link_name = "llvm.aarch64.neon.sqneg.v8i16"
         )]
-        fn _vrecpe_f32(a: float32x2_t) -> float32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i16")]
+        fn _vqnegq_s16(a: int16x8_t) -> int16x8_t;
     }
-    unsafe { _vrecpe_f32(a) }
+    unsafe { _vqnegq_s16(a) }
 }
-#[doc = "Reciprocal estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f32)"]
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frecpe)
+    assert_instr(sqneg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37484,26 +38511,26 @@ pub fn vrecpe_f32(a: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrecpeq_f32(a: float32x4_t) -> float32x4_t {
+pub fn vqneg_s32(a: int32x2_t) -> int32x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecpe.v4f32"
+            link_name = "llvm.aarch64.neon.sqneg.v2i32"
         )]
-        fn _vrecpeq_f32(a: float32x4_t) -> float32x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v2i32")]
+        fn _vqneg_s32(a: int32x2_t) -> int32x2_t;
     }
-    unsafe { _vrecpeq_f32(a) }
+    unsafe { _vqneg_s32(a) }
 }
-#[doc = "Unsigned reciprocal estimate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_u32)"]
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urecpe)
+    assert_instr(sqneg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37513,27 +38540,28 @@ pub fn vrecpeq_f32(a: float32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrecpe_u32(a: uint32x2_t) -> uint32x2_t {
+pub fn vqnegq_s32(a: int32x4_t) -> int32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.urecpe.v2i32"
+            link_name = "llvm.aarch64.neon.sqneg.v4i32"
         )]
-        fn _vrecpe_u32(a: uint32x2_t) -> uint32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i32")]
+        fn _vqnegq_s32(a: int32x4_t) -> int32x4_t;
     }
-    unsafe { _vrecpe_u32(a) }
+    unsafe { _vqnegq_s32(a) }
 }
-#[doc = "Unsigned reciprocal estimate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_u32)"]
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urecpe)
+    assert_instr(sqrdmulh, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -37542,87 +38570,70 @@ pub fn vrecpe_u32(a: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrecpeq_u32(a: uint32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.urecpe.v4i32"
-        )]
-        fn _vrecpeq_u32(a: uint32x4_t) -> uint32x4_t;
-    }
-    unsafe { _vrecpeq_u32(a) }
+pub fn vqrdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let b = vdup_lane_s16::<LANE>(b);
+    vqrdmulh_s16(a, b)
 }
-#[doc = "Floating-point reciprocal step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f16)"]
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s32)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frecps)
+    assert_instr(sqrdmulh, LANE = 1)
 )]
-#[target_feature(enable = "neon,fp16")]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vrecps_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecps.v4f16"
-        )]
-        fn _vrecps_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
-    }
-    unsafe { _vrecps_f16(a, b) }
+pub fn vqrdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let b = vdup_lane_s32::<LANE>(b);
+    vqrdmulh_s32(a, b)
 }
-#[doc = "Floating-point reciprocal step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f16)"]
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s16)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frecps)
+    assert_instr(sqrdmulh, LANE = 1)
 )]
-#[target_feature(enable = "neon,fp16")]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v8f16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecps.v8f16"
-        )]
-        fn _vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
-    }
-    unsafe { _vrecpsq_f16(a, b) }
+pub fn vqrdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    let b = vdup_laneq_s16::<LANE>(b);
+    vqrdmulh_s16(a, b)
 }
-#[doc = "Floating-point reciprocal step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f32)"]
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frecps)
+    assert_instr(sqrdmulh, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -37631,27 +38642,22 @@ pub fn vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v2f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecps.v2f32"
-        )]
-        fn _vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-    unsafe { _vrecps_f32(a, b) }
+pub fn vqrdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let b = vdup_laneq_s32::<LANE>(b);
+    vqrdmulh_s32(a, b)
 }
-#[doc = "Floating-point reciprocal step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f32)"]
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frecps)
+    assert_instr(sqrdmulh, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -37660,1901 +38666,2582 @@ pub fn vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecps.v4f32"
-        )]
-        fn _vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-    unsafe { _vrecpsq_f32(a, b) }
+pub fn vqrdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let b = vdupq_lane_s16::<LANE>(b);
+    vqrdmulhq_s16(a, b)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f16)"]
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqrdmulh, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t {
-    unsafe { transmute(a) }
+pub fn vqrdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let b = vdupq_lane_s32::<LANE>(b);
+    vqrdmulhq_s32(a, b)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f16)"]
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqrdmulh, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t {
-    unsafe {
-        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vqrdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    let b = vdupq_laneq_s16::<LANE>(b);
+    vqrdmulhq_s16(a, b)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f16)"]
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqrdmulh, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t {
-    unsafe { transmute(a) }
+pub fn vqrdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let b = vdupq_laneq_s32::<LANE>(b);
+    vqrdmulhq_s32(a, b)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f16)"]
+#[doc = "Vector saturating rounding doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqrdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t {
-    unsafe {
-        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vqrdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
+    vqrdmulh_s16(a, vdup_n_s16(b))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"]
+#[doc = "Vector saturating rounding doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqrdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t {
-    unsafe { transmute(a) }
+pub fn vqrdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
+    vqrdmulhq_s16(a, vdupq_n_s16(b))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"]
+#[doc = "Vector saturating rounding doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqrdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t {
-    unsafe {
-        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vqrdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
+    vqrdmulh_s32(a, vdup_n_s32(b))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"]
+#[doc = "Vector saturating rounding doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqrdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t {
-    unsafe { transmute(a) }
+pub fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
+    vqrdmulhq_s32(a, vdupq_n_s32(b))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"]
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqrdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t {
-    unsafe {
-        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
+pub fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrdmulh.v4i16"
+        )]
+        fn _vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
+    unsafe { _vqrdmulh_s16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"]
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqrdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t {
-    unsafe { transmute(a) }
+pub fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrdmulh.v8i16"
+        )]
+        fn _vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    unsafe { _vqrdmulhq_s16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"]
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqrdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t {
-    unsafe {
-        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+pub fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrdmulh.v2i32"
+        )]
+        fn _vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
+    unsafe { _vqrdmulh_s32(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"]
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqrdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t {
-    unsafe { transmute(a) }
+pub fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrdmulh.v4i32"
+        )]
+        fn _vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    unsafe { _vqrdmulhq_s32(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"]
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t {
-    unsafe {
-        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+pub fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v8i8"
+        )]
+        fn _vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
+    unsafe { _vqrshl_s8(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"]
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t {
-    unsafe { transmute(a) }
+pub fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v16i8"
+        )]
+        fn _vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    unsafe { _vqrshlq_s8(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"]
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t {
-    unsafe {
-        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
+pub fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v4i16"
+        )]
+        fn _vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
+    unsafe { _vqrshl_s16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"]
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t {
-    unsafe { transmute(a) }
+pub fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v8i16"
+        )]
+        fn _vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    unsafe { _vqrshlq_s16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"]
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t {
-    unsafe {
-        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+pub fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v2i32"
+        )]
+        fn _vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
+    unsafe { _vqrshl_s32(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"]
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t {
-    unsafe { transmute(a) }
+pub fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v4i32"
+        )]
+        fn _vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    unsafe { _vqrshlq_s32(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"]
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t {
-    unsafe {
-        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+pub fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v1i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v1i64"
+        )]
+        fn _vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
     }
+    unsafe { _vqrshl_s64(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"]
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t {
-    unsafe { transmute(a) }
+pub fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v2i64"
+        )]
+        fn _vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+    }
+    unsafe { _vqrshlq_s64(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"]
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t {
-    unsafe {
-        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+pub fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.v8i8"
+        )]
+        fn _vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
     }
+    unsafe { _vqrshl_u8(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"]
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t {
-    unsafe { transmute(a) }
+pub fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.v16i8"
+        )]
+        fn _vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
+    }
+    unsafe { _vqrshlq_u8(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"]
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t {
-    unsafe {
-        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+pub fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.v4i16"
+        )]
+        fn _vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
     }
+    unsafe { _vqrshl_u16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"]
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t {
-    unsafe {
-        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+pub fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.v8i16"
+        )]
+        fn _vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
     }
+    unsafe { _vqrshlq_u16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"]
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t {
-    unsafe { transmute(a) }
+pub fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.v2i32"
+        )]
+        fn _vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
+    }
+    unsafe { _vqrshl_u32(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"]
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t {
-    unsafe {
-        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+pub fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.v4i32"
+        )]
+        fn _vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
     }
+    unsafe { _vqrshlq_u32(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"]
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t {
-    unsafe { transmute(a) }
+pub fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v1i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.v1i64"
+        )]
+        fn _vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
+    }
+    unsafe { _vqrshl_u64(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"]
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t {
-    unsafe {
-        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+pub fn vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.v2i64"
+        )]
+        fn _vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
     }
+    unsafe { _vqrshlq_u64(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"]
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v8i8")]
+        fn _vqrshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
+    }
+    unsafe { _vqrshrn_n_s16(a, const { int16x8_t([-N as i16; 8]) }) }
+}
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v4i16")]
+        fn _vqrshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
+    }
+    unsafe { _vqrshrn_n_s32(a, const { int32x4_t([-N; 4]) }) }
+}
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v2i32")]
+        fn _vqrshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
+    }
+    unsafe { _vqrshrn_n_s64(a, const { int64x2_t([-N as i64; 2]) }) }
+}
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshrn.v8i8"
+        )]
+        fn _vqrshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t;
+    }
+    unsafe { _vqrshrn_n_s16(a, N) }
+}
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshrn.v4i16"
+        )]
+        fn _vqrshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t;
+    }
+    unsafe { _vqrshrn_n_s32(a, N) }
+}
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshrn.v2i32"
+        )]
+        fn _vqrshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t;
+    }
+    unsafe { _vqrshrn_n_s64(a, N) }
+}
+#[doc = "Unsigned signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v8i8")]
+        fn _vqrshrn_n_u16(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t;
+    }
+    unsafe { _vqrshrn_n_u16(a, const { uint16x8_t([-N as u16; 8]) }) }
+}
+#[doc = "Unsigned signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v4i16")]
+        fn _vqrshrn_n_u32(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t;
+    }
+    unsafe { _vqrshrn_n_u32(a, const { uint32x4_t([-N as u32; 4]) }) }
+}
+#[doc = "Unsigned signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v2i32")]
+        fn _vqrshrn_n_u64(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t;
+    }
+    unsafe { _vqrshrn_n_u64(a, const { uint64x2_t([-N as u64; 2]) }) }
+}
+#[doc = "Unsigned signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshrn.v8i8"
+        )]
+        fn _vqrshrn_n_u16(a: uint16x8_t, n: i32) -> uint8x8_t;
+    }
+    unsafe { _vqrshrn_n_u16(a, N) }
+}
+#[doc = "Unsigned signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshrn.v4i16"
+        )]
+        fn _vqrshrn_n_u32(a: uint32x4_t, n: i32) -> uint16x4_t;
+    }
+    unsafe { _vqrshrn_n_u32(a, N) }
+}
+#[doc = "Unsigned signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshrn.v2i32"
+        )]
+        fn _vqrshrn_n_u64(a: uint64x2_t, n: i32) -> uint32x2_t;
+    }
+    unsafe { _vqrshrn_n_u64(a, N) }
+}
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v8i8")]
+        fn _vqrshrun_n_s16(a: int16x8_t, n: int16x8_t) -> uint8x8_t;
+    }
+    unsafe { _vqrshrun_n_s16(a, const { int16x8_t([-N as i16; 8]) }) }
+}
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v4i16")]
+        fn _vqrshrun_n_s32(a: int32x4_t, n: int32x4_t) -> uint16x4_t;
+    }
+    unsafe { _vqrshrun_n_s32(a, const { int32x4_t([-N; 4]) }) }
+}
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v2i32")]
+        fn _vqrshrun_n_s64(a: int64x2_t, n: int64x2_t) -> uint32x2_t;
+    }
+    unsafe { _vqrshrun_n_s64(a, const { int64x2_t([-N as i64; 2]) }) }
+}
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshrun.v8i8"
+        )]
+        fn _vqrshrun_n_s16(a: int16x8_t, n: i32) -> uint8x8_t;
+    }
+    unsafe { _vqrshrun_n_s16(a, N) }
+}
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshrun.v4i16"
+        )]
+        fn _vqrshrun_n_s32(a: int32x4_t, n: i32) -> uint16x4_t;
+    }
+    unsafe { _vqrshrun_n_s32(a, N) }
+}
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshrun.v2i32"
+        )]
+        fn _vqrshrun_n_s64(a: int64x2_t, n: i32) -> uint32x2_t;
+    }
+    unsafe { _vqrshrun_n_s64(a, N) }
+}
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqshl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t {
-    unsafe { transmute(a) }
+pub fn vqshl_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    vqshl_s8(a, vdup_n_s8(N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqshl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t {
-    unsafe {
-        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vqshlq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    vqshlq_s8(a, vdupq_n_s8(N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqshl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t {
-    unsafe { transmute(a) }
+pub fn vqshl_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    vqshl_s16(a, vdup_n_s16(N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqshl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t {
-    unsafe {
-        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vqshlq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    vqshlq_s16(a, vdupq_n_s16(N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqshl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t {
-    unsafe { transmute(a) }
+pub fn vqshl_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(N, 5);
+    vqshl_s32(a, vdup_n_s32(N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqshl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t {
-    unsafe {
-        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vqshlq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 5);
+    vqshlq_s32(a, vdupq_n_s32(N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqshl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t {
-    unsafe { transmute(a) }
+pub fn vqshl_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
+    static_assert_uimm_bits!(N, 6);
+    vqshl_s64(a, vdup_n_s64(N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqshl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t {
-    unsafe {
-        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vqshlq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 6);
+    vqshlq_s64(a, vdupq_n_s64(N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqshl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t {
-    unsafe { transmute(a) }
+pub fn vqshl_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    vqshl_u8(a, vdup_n_s8(N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqshl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t {
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vqshlq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    vqshlq_u8(a, vdupq_n_s8(N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqshl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t {
-    unsafe { transmute(a) }
+pub fn vqshl_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    vqshl_u16(a, vdup_n_s16(N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqshl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t {
-    unsafe {
-        let a: int8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vqshlq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    vqshlq_u16(a, vdupq_n_s16(N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqshl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t {
-    unsafe { transmute(a) }
+pub fn vqshl_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(N, 5);
+    vqshl_u32(a, vdup_n_s32(N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqshl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t {
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vqshlq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 5);
+    vqshlq_u32(a, vdupq_n_s32(N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqshl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t {
-    unsafe { transmute(a) }
+pub fn vqshl_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
+    static_assert_uimm_bits!(N, 6);
+    vqshl_u64(a, vdup_n_s64(N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqshl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t {
-    unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vqshlq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(N, 6);
+    vqshlq_u64(a, vdupq_n_s64(N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t {
-    unsafe { transmute(a) }
+pub fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.v8i8"
+        )]
+        fn _vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    unsafe { _vqshl_s8(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t {
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+pub fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.v16i8"
+        )]
+        fn _vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
+    unsafe { _vqshlq_s8(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t {
-    unsafe { transmute(a) }
+pub fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.v4i16"
+        )]
+        fn _vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    unsafe { _vqshl_s16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t {
-    unsafe {
-        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+pub fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.v8i16"
+        )]
+        fn _vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
+    unsafe { _vqshlq_s16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t {
-    unsafe { transmute(a) }
+pub fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.v2i32"
+        )]
+        fn _vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    unsafe { _vqshl_s32(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t {
-    unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+pub fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.v4i32"
+        )]
+        fn _vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
+    unsafe { _vqshlq_s32(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t {
-    unsafe { transmute(a) }
+pub fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v1i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.v1i64"
+        )]
+        fn _vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+    }
+    unsafe { _vqshl_s64(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t {
-    unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+pub fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.v2i64"
+        )]
+        fn _vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
     }
+    unsafe { _vqshlq_s64(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t {
-    unsafe { transmute(a) }
+pub fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.v8i8"
+        )]
+        fn _vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
+    }
+    unsafe { _vqshl_u8(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t {
-    unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+pub fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.v16i8"
+        )]
+        fn _vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
     }
+    unsafe { _vqshlq_u8(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t {
-    unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+pub fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.v4i16"
+        )]
+        fn _vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
     }
+    unsafe { _vqshl_u16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t {
-    unsafe { transmute(a) }
+pub fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.v8i16"
+        )]
+        fn _vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
+    }
+    unsafe { _vqshlq_u16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t {
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+pub fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.v2i32"
+        )]
+        fn _vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
     }
+    unsafe { _vqshl_u32(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t {
-    unsafe { transmute(a) }
+pub fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.v4i32"
+        )]
+        fn _vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
+    }
+    unsafe { _vqshlq_u32(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t {
-    unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+pub fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v1i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.v1i64"
+        )]
+        fn _vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
     }
+    unsafe { _vqshl_u64(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t {
-    unsafe { transmute(a) }
+pub fn vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.v2i64"
+        )]
+        fn _vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
+    }
+    unsafe { _vqshlq_u64(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"]
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t {
-    unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i8")]
+        fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> uint8x8_t;
     }
+    unsafe { _vqshlu_n_s8(a, const { int8x8_t([N as i8; 8]) }) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"]
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t {
-    unsafe { transmute(a) }
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v16i8")]
+        fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> uint8x16_t;
+    }
+    unsafe { _vqshluq_n_s8(a, const { int8x16_t([N as i8; 16]) }) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"]
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t {
-    unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i16")]
+        fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> uint16x4_t;
     }
+    unsafe { _vqshlu_n_s16(a, const { int16x4_t([N as i16; 4]) }) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"]
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t {
-    unsafe { transmute(a) }
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i16")]
+        fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> uint16x8_t;
+    }
+    unsafe { _vqshluq_n_s16(a, const { int16x8_t([N as i16; 8]) }) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"]
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t {
-    unsafe { transmute(a) }
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(N, 5);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i32")]
+        fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> uint32x2_t;
+    }
+    unsafe { _vqshlu_n_s32(a, const { int32x2_t([N; 2]) }) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"]
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t {
-    unsafe { transmute(a) }
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 5);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i32")]
+        fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> uint32x4_t;
+    }
+    unsafe { _vqshluq_n_s32(a, const { int32x4_t([N; 4]) }) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"]
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t {
-    unsafe { transmute(a) }
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqshlu_n_s64<const N: i32>(a: int64x1_t) -> uint64x1_t {
+    static_assert_uimm_bits!(N, 6);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v1i64")]
+        fn _vqshlu_n_s64(a: int64x1_t, n: int64x1_t) -> uint64x1_t;
+    }
+    unsafe { _vqshlu_n_s64(a, const { int64x1_t([N as i64]) }) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"]
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t {
-    unsafe { transmute(a) }
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(N, 6);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i64")]
+        fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> uint64x2_t;
+    }
+    unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64; 2]) }) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"]
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t {
-    unsafe { transmute(a) }
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v8i8"
+        )]
+        fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> uint8x8_t;
+    }
+    unsafe { _vqshlu_n_s8(a, const { int8x8_t([N as i8; 8]) }) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"]
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t {
-    unsafe { transmute(a) }
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v16i8"
+        )]
+        fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> uint8x16_t;
+    }
+    unsafe { _vqshluq_n_s8(a, const { int8x16_t([N as i8; 16]) }) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"]
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t {
-    unsafe { transmute(a) }
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v4i16"
+        )]
+        fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> uint16x4_t;
+    }
+    unsafe { _vqshlu_n_s16(a, const { int16x4_t([N as i16; 4]) }) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"]
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v8i16"
+        )]
+        fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> uint16x8_t;
+    }
+    unsafe { _vqshluq_n_s16(a, const { int16x8_t([N as i16; 8]) }) }
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(N, 5);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v2i32"
+        )]
+        fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> uint32x2_t;
+    }
+    unsafe { _vqshlu_n_s32(a, const { int32x2_t([N; 2]) }) }
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 5);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v4i32"
+        )]
+        fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> uint32x4_t;
+    }
+    unsafe { _vqshluq_n_s32(a, const { int32x4_t([N; 4]) }) }
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqshlu_n_s64<const N: i32>(a: int64x1_t) -> uint64x1_t {
+    static_assert_uimm_bits!(N, 6);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v1i64"
+        )]
+        fn _vqshlu_n_s64(a: int64x1_t, n: int64x1_t) -> uint64x1_t;
+    }
+    unsafe { _vqshlu_n_s64(a, const { int64x1_t([N as i64]) }) }
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(N, 6);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v2i64"
+        )]
+        fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> uint64x2_t;
+    }
+    unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64; 2]) }) }
+}
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v8i8")]
+        fn _vqshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
+    }
+    unsafe { _vqshrn_n_s16(a, const { int16x8_t([-N as i16; 8]) }) }
+}
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v4i16")]
+        fn _vqshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
+    }
+    unsafe { _vqshrn_n_s32(a, const { int32x4_t([-N; 4]) }) }
+}
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v2i32")]
+        fn _vqshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
+    }
+    unsafe { _vqshrn_n_s64(a, const { int64x2_t([-N as i64; 2]) }) }
+}
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshrn.v8i8"
+        )]
+        fn _vqshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t;
+    }
+    unsafe { _vqshrn_n_s16(a, N) }
+}
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshrn.v4i16"
+        )]
+        fn _vqshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t;
+    }
+    unsafe { _vqshrn_n_s32(a, N) }
+}
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshrn.v2i32"
+        )]
+        fn _vqshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t;
+    }
+    unsafe { _vqshrn_n_s64(a, N) }
+}
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v8i8")]
+        fn _vqshrn_n_u16(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t;
+    }
+    unsafe { _vqshrn_n_u16(a, const { uint16x8_t([-N as u16; 8]) }) }
+}
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v4i16")]
+        fn _vqshrn_n_u32(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t;
+    }
+    unsafe { _vqshrn_n_u32(a, const { uint32x4_t([-N as u32; 4]) }) }
+}
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v2i32")]
+        fn _vqshrn_n_u64(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t;
+    }
+    unsafe { _vqshrn_n_u64(a, const { uint64x2_t([-N as u64; 2]) }) }
+}
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshrn.v8i8"
+        )]
+        fn _vqshrn_n_u16(a: uint16x8_t, n: i32) -> uint8x8_t;
+    }
+    unsafe { _vqshrn_n_u16(a, N) }
+}
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshrn.v4i16"
+        )]
+        fn _vqshrn_n_u32(a: uint32x4_t, n: i32) -> uint16x4_t;
+    }
+    unsafe { _vqshrn_n_u32(a, N) }
+}
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshrn.v2i32"
+        )]
+        fn _vqshrn_n_u64(a: uint64x2_t, n: i32) -> uint32x2_t;
+    }
+    unsafe { _vqshrn_n_u64(a, N) }
+}
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v8i8")]
+        fn _vqshrun_n_s16(a: int16x8_t, n: int16x8_t) -> uint8x8_t;
+    }
+    unsafe { _vqshrun_n_s16(a, const { int16x8_t([-N as i16; 8]) }) }
+}
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v4i16")]
+        fn _vqshrun_n_s32(a: int32x4_t, n: int32x4_t) -> uint16x4_t;
+    }
+    unsafe { _vqshrun_n_s32(a, const { int32x4_t([-N; 4]) }) }
+}
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v2i32")]
+        fn _vqshrun_n_s64(a: int64x2_t, n: int64x2_t) -> uint32x2_t;
+    }
+    unsafe { _vqshrun_n_s64(a, const { int64x2_t([-N as i64; 2]) }) }
+}
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshrun.v8i8"
+        )]
+        fn _vqshrun_n_s16(a: int16x8_t, n: i32) -> uint8x8_t;
+    }
+    unsafe { _vqshrun_n_s16(a, N) }
+}
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshrun.v4i16"
+        )]
+        fn _vqshrun_n_s32(a: int32x4_t, n: i32) -> uint16x4_t;
+    }
+    unsafe { _vqshrun_n_s32(a, N) }
+}
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshrun.v2i32"
+        )]
+        fn _vqshrun_n_s64(a: int64x2_t, n: i32) -> uint32x2_t;
+    }
+    unsafe { _vqshrun_n_s64(a, N) }
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t {
-    unsafe { transmute(a) }
+pub fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t {
-    unsafe { transmute(a) }
+pub fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t {
-    unsafe { transmute(a) }
+pub fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t {
-    unsafe { transmute(a) }
+pub fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39564,18 +41251,18 @@ pub fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
-    unsafe { transmute(a) }
+pub fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39585,18 +41272,18 @@ pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
-    unsafe { transmute(a) }
+pub fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39606,18 +41293,18 @@ pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
-    unsafe { transmute(a) }
+pub fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39627,18 +41314,18 @@ pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
-    unsafe { transmute(a) }
+pub fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39648,18 +41335,18 @@ pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
-    unsafe { transmute(a) }
+pub fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39669,18 +41356,18 @@ pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
-    unsafe { transmute(a) }
+pub fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39690,18 +41377,18 @@ pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
-    unsafe { transmute(a) }
+pub fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39711,18 +41398,18 @@ pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
-    unsafe { transmute(a) }
+pub fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39732,18 +41419,18 @@ pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
-    unsafe { transmute(a) }
+pub fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39753,18 +41440,18 @@ pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
-    unsafe { transmute(a) }
+pub fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39774,18 +41461,18 @@ pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
-    unsafe { transmute(a) }
+pub fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39795,18 +41482,22 @@ pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
-    unsafe { transmute(a) }
+pub fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"]
+#[doc = "Rounding Add returning High Narrow (high half)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(raddhn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39816,18 +41507,23 @@ pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
-    unsafe { transmute(a) }
+pub fn vraddhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
+    let x = vraddhn_s16(b, c);
+    vcombine_s8(a, x)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"]
+#[doc = "Rounding Add returning High Narrow (high half)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(raddhn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39837,18 +41533,23 @@ pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
-    unsafe { transmute(a) }
+pub fn vraddhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
+    let x = vraddhn_s32(b, c);
+    vcombine_s16(a, x)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"]
+#[doc = "Rounding Add returning High Narrow (high half)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(raddhn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39858,18 +41559,23 @@ pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
-    unsafe { transmute(a) }
+pub fn vraddhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
+    let x = vraddhn_s64(b, c);
+    vcombine_s32(a, x)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"]
+#[doc = "Rounding Add returning High Narrow (high half)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(raddhn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39879,18 +41585,25 @@ pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
-    unsafe { transmute(a) }
+pub fn vraddhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
+    unsafe {
+        let x: uint8x8_t = transmute(vraddhn_s16(transmute(b), transmute(c)));
+        vcombine_u8(a, x)
+    }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s64)"]
+#[doc = "Rounding Add returning High Narrow (high half)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(raddhn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39900,18 +41613,25 @@ pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t {
-    unsafe { transmute(a) }
+pub fn vraddhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
+    unsafe {
+        let x: uint16x4_t = transmute(vraddhn_s32(transmute(b), transmute(c)));
+        vcombine_u16(a, x)
+    }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"]
+#[doc = "Rounding Add returning High Narrow (high half)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(raddhn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39921,18 +41641,21 @@ pub fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
-    unsafe { transmute(a) }
+pub fn vraddhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
+    unsafe {
+        let x: uint32x2_t = transmute(vraddhn_s64(transmute(b), transmute(c)));
+        vcombine_u32(a, x)
+    }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"]
+#[doc = "Rounding Add returning High Narrow."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(raddhn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39942,18 +41665,26 @@ pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
-    unsafe { transmute(a) }
+pub fn vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.raddhn.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v8i8")]
+        fn _vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t;
+    }
+    unsafe { _vraddhn_s16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"]
+#[doc = "Rounding Add returning High Narrow."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(raddhn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39963,18 +41694,26 @@ pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
-    unsafe { transmute(a) }
+pub fn vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.raddhn.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v4i16")]
+        fn _vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t;
+    }
+    unsafe { _vraddhn_s32(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"]
+#[doc = "Rounding Add returning High Narrow."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(raddhn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39984,18 +41723,26 @@ pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
-    unsafe { transmute(a) }
+pub fn vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.raddhn.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v2i32")]
+        fn _vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t;
+    }
+    unsafe { _vraddhn_s64(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"]
+#[doc = "Rounding Add returning High Narrow."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(raddhn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -40005,18 +41752,18 @@ pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
-    unsafe { transmute(a) }
+pub fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
+    unsafe { transmute(vraddhn_s16(transmute(a), transmute(b))) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"]
+#[doc = "Rounding Add returning High Narrow."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(raddhn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -40026,18 +41773,18 @@ pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
-    unsafe { transmute(a) }
+pub fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
+    unsafe { transmute(vraddhn_s32(transmute(a), transmute(b))) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"]
+#[doc = "Rounding Add returning High Narrow."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(raddhn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -40047,60 +41794,78 @@ pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
-    unsafe { transmute(a) }
+pub fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
+    unsafe { transmute(vraddhn_s64(transmute(a), transmute(b))) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"]
+#[doc = "Reciprocal estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frecpe)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
-    unsafe { transmute(a) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vrecpe_f16(a: float16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecpe.v4f16"
+        )]
+        fn _vrecpe_f16(a: float16x4_t) -> float16x4_t;
+    }
+    unsafe { _vrecpe_f16(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"]
+#[doc = "Reciprocal estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frecpe)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
-    unsafe { transmute(a) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vrecpeq_f16(a: float16x8_t) -> float16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v8f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecpe.v8f16"
+        )]
+        fn _vrecpeq_f16(a: float16x8_t) -> float16x8_t;
+    }
+    unsafe { _vrecpeq_f16(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"]
+#[doc = "Reciprocal estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frecpe)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -40110,18 +41875,26 @@ pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
-    unsafe { transmute(a) }
+pub fn vrecpe_f32(a: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecpe.v2f32"
+        )]
+        fn _vrecpe_f32(a: float32x2_t) -> float32x2_t;
+    }
+    unsafe { _vrecpe_f32(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"]
+#[doc = "Reciprocal estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frecpe)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -40131,18 +41904,26 @@ pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
-    unsafe { transmute(a) }
+pub fn vrecpeq_f32(a: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecpe.v4f32"
+        )]
+        fn _vrecpeq_f32(a: float32x4_t) -> float32x4_t;
+    }
+    unsafe { _vrecpeq_f32(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"]
+#[doc = "Unsigned reciprocal estimate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urecpe)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -40152,18 +41933,26 @@ pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
-    unsafe { transmute(a) }
+pub fn vrecpe_u32(a: uint32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urecpe.v2i32"
+        )]
+        fn _vrecpe_u32(a: uint32x2_t) -> uint32x2_t;
+    }
+    unsafe { _vrecpe_u32(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"]
+#[doc = "Unsigned reciprocal estimate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urecpe)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -40173,60 +41962,86 @@ pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
-    unsafe { transmute(a) }
+pub fn vrecpeq_u32(a: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urecpe.v4i32"
+        )]
+        fn _vrecpeq_u32(a: uint32x4_t) -> uint32x4_t;
+    }
+    unsafe { _vrecpeq_u32(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u64)"]
+#[doc = "Floating-point reciprocal step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frecps)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t {
-    unsafe { transmute(a) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vrecps_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecps.v4f16"
+        )]
+        fn _vrecps_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
+    }
+    unsafe { _vrecps_f16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"]
+#[doc = "Floating-point reciprocal step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frecps)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
-    unsafe { transmute(a) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v8f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecps.v8f16"
+        )]
+        fn _vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
+    }
+    unsafe { _vrecpsq_f16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"]
+#[doc = "Floating-point reciprocal step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frecps)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -40236,18 +42051,26 @@ pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
-    unsafe { transmute(a) }
+pub fn vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecps.v2f32"
+        )]
+        fn _vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    unsafe { _vrecps_f32(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"]
+#[doc = "Floating-point reciprocal step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frecps)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -40257,11 +42080,19 @@ pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
-    unsafe { transmute(a) }
+pub fn vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecps.v4f32"
+        )]
+        fn _vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    unsafe { _vrecpsq_f32(a, b) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -40272,17 +42103,18 @@ pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -40293,17 +42125,18 @@ pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -40314,17 +42147,18 @@ pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -40335,17 +42169,18 @@ pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -40356,17 +42191,18 @@ pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -40377,21 +42213,21 @@ pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -40406,15 +42242,14 @@ pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t {
+pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -40429,18 +42264,14 @@ pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t {
-    unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -40455,15 +42286,14 @@ pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t {
+pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -40478,18 +42308,14 @@ pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t {
-    unsafe {
-        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -40504,15 +42330,14 @@ pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 {
+pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -40527,18 +42352,14 @@ pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 {
-    unsafe {
-        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -40553,15 +42374,14 @@ pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t {
+pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -40576,19 +42396,14 @@ pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t {
-    unsafe {
-        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -40603,15 +42418,14 @@ pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t {
+pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -40626,18 +42440,14 @@ pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t {
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -40652,15 +42462,14 @@ pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t {
+pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -40675,17 +42484,12 @@ pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t {
-    unsafe {
-        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40695,19 +42499,19 @@ pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40717,22 +42521,19 @@ pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
-    unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40742,19 +42543,19 @@ pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40764,23 +42565,19 @@ pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
-    unsafe {
-        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40790,19 +42587,19 @@ pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40812,23 +42609,19 @@ pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
-    unsafe {
-        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40838,19 +42631,19 @@ pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40860,22 +42653,19 @@ pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
-    unsafe {
-        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
-        transmute(a)
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40885,19 +42675,19 @@ pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40907,23 +42697,19 @@ pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
-    unsafe {
-        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40933,19 +42719,19 @@ pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40955,23 +42741,19 @@ pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
-    unsafe {
-        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -40981,19 +42763,19 @@ pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41003,22 +42785,19 @@ pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
-    unsafe {
-        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
-        transmute(a)
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41028,19 +42807,19 @@ pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41050,23 +42829,19 @@ pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
-    unsafe {
-        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41076,19 +42851,19 @@ pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41098,23 +42873,19 @@ pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
-    unsafe {
-        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41124,19 +42895,19 @@ pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41146,22 +42917,19 @@ pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
-    unsafe {
-        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41171,19 +42939,19 @@ pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41193,27 +42961,19 @@ pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
-    unsafe {
-        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41223,19 +42983,19 @@ pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41245,23 +43005,19 @@ pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
-    unsafe {
-        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41271,19 +43027,19 @@ pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41293,23 +43049,19 @@ pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
-    unsafe {
-        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41325,13 +43077,12 @@ pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
+pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41347,21 +43098,12 @@ pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
-    unsafe {
-        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41377,13 +43119,12 @@ pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
+pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41399,17 +43140,12 @@ pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
-    unsafe {
-        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
+pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
+    unsafe { transmute(a) }
+}
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41425,13 +43161,12 @@ pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
+pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41447,17 +43182,12 @@ pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
-    unsafe {
-        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41473,13 +43203,12 @@ pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
+pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41495,21 +43224,12 @@ pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
-    unsafe {
-        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41525,13 +43245,12 @@ pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
+pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41547,17 +43266,12 @@ pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
-    unsafe {
-        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41573,13 +43287,12 @@ pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
+pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41595,17 +43308,12 @@ pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41621,13 +43329,12 @@ pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
+pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41643,17 +43350,12 @@ pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41669,13 +43371,12 @@ pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
+pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41691,17 +43392,12 @@ pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41717,13 +43413,12 @@ pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
+pub fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41739,16 +43434,12 @@ pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41764,13 +43455,12 @@ pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
+pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41786,17 +43476,12 @@ pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41812,13 +43497,12 @@ pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
+pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41834,17 +43518,12 @@ pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41860,13 +43539,12 @@ pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
+pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41882,16 +43560,12 @@ pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41907,13 +43581,12 @@ pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
+pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41929,17 +43602,12 @@ pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41955,13 +43623,12 @@ pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
+pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -41977,16 +43644,12 @@ pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42002,13 +43665,12 @@ pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
+pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42024,17 +43686,12 @@ pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
-    unsafe {
-        let a: int8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42050,13 +43707,12 @@ pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
+pub fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42072,18 +43728,12 @@ pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
-    unsafe {
-        let a: int8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42099,13 +43749,12 @@ pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
+pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42121,18 +43770,12 @@ pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
-    unsafe {
-        let a: int8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42148,13 +43791,12 @@ pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
+pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42170,18 +43812,12 @@ pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
-    unsafe {
-        let a: int8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42197,13 +43833,12 @@ pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
+pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42219,18 +43854,12 @@ pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
-    unsafe {
-        let a: int8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42246,13 +43875,12 @@ pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
+pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42268,20 +43896,14 @@ pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
-    unsafe {
-        let a: int8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -42289,21 +43911,21 @@ pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -42311,26 +43933,21 @@ pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
-    unsafe {
-        let a: int8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -42338,21 +43955,21 @@ pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -42360,26 +43977,21 @@ pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
-    unsafe {
-        let a: int8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -42387,21 +43999,21 @@ pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -42409,24 +44021,19 @@ pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
-    unsafe {
-        let a: int8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42442,13 +44049,12 @@ pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
+pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42464,18 +44070,12 @@ pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
-    unsafe {
-        let a: int8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42491,13 +44091,12 @@ pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
+pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42513,17 +44112,12 @@ pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
-    unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42539,13 +44133,12 @@ pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
+pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42561,17 +44154,12 @@ pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
-    unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42587,13 +44175,12 @@ pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
+pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42609,17 +44196,12 @@ pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
-    unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42635,13 +44217,12 @@ pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
+pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42657,16 +44238,12 @@ pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
-    unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42682,13 +44259,12 @@ pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
+pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42704,17 +44280,12 @@ pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
-    unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42730,13 +44301,12 @@ pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
+pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42752,17 +44322,12 @@ pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
-    unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42778,13 +44343,12 @@ pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
+pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42800,16 +44364,12 @@ pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
-    unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42825,13 +44385,12 @@ pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
+pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42847,17 +44406,12 @@ pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
-    unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42873,13 +44427,12 @@ pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
+pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42895,16 +44448,12 @@ pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
-    unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42920,13 +44469,12 @@ pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
+pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42942,16 +44490,12 @@ pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
-    unsafe {
-        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42967,13 +44511,12 @@ pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
+pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -42989,17 +44532,12 @@ pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
-    unsafe {
-        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43015,13 +44553,12 @@ pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
+pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43037,21 +44574,12 @@ pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
-    unsafe {
-        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43067,13 +44595,12 @@ pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
+pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43089,17 +44616,12 @@ pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
-    unsafe {
-        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43115,13 +44637,12 @@ pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
+pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43137,17 +44658,12 @@ pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
-    unsafe {
-        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43163,13 +44679,12 @@ pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
+pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43185,21 +44700,12 @@ pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
-    unsafe {
-        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43215,13 +44721,12 @@ pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
+pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43237,17 +44742,12 @@ pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
-    unsafe {
-        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43263,13 +44763,12 @@ pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
+pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43285,17 +44784,12 @@ pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
-    unsafe {
-        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43311,13 +44805,12 @@ pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
+pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43333,21 +44826,12 @@ pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
-    unsafe {
-        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43363,13 +44847,12 @@ pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
+pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43385,17 +44868,12 @@ pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
-    unsafe {
-        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43411,13 +44889,12 @@ pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
+pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43433,17 +44910,12 @@ pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43459,13 +44931,12 @@ pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
+pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43481,17 +44952,12 @@ pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43507,13 +44973,12 @@ pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
+pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43529,16 +44994,12 @@ pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43554,13 +45015,12 @@ pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
+pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43576,17 +45036,12 @@ pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43602,13 +45057,12 @@ pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
+pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43624,17 +45078,12 @@ pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43650,13 +45099,12 @@ pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
+pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43672,16 +45120,12 @@ pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43697,13 +45141,12 @@ pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
+pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43719,17 +45162,12 @@ pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43745,13 +45183,12 @@ pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
+pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43767,17 +45204,12 @@ pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43793,13 +45225,12 @@ pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
+pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43815,16 +45246,12 @@ pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43840,13 +45267,12 @@ pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
+pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43862,16 +45288,12 @@ pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
-    unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43887,13 +45309,12 @@ pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
+pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43909,21 +45330,12 @@ pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
-    unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43939,13 +45351,12 @@ pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
+pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43961,17 +45372,12 @@ pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
-    unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -43987,13 +45393,12 @@ pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
+pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44009,17 +45414,12 @@ pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
-    unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44035,13 +45435,12 @@ pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
+pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44057,21 +45456,12 @@ pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
-    unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44087,13 +45477,12 @@ pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
+pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44109,17 +45498,12 @@ pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
-    unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44135,13 +45519,12 @@ pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
+pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44158,16 +45541,11 @@ pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
-    unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44187,9 +45565,8 @@ pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44205,21 +45582,12 @@ pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
-    unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44235,13 +45603,12 @@ pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
+pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44257,17 +45624,12 @@ pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
-    unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44283,13 +45645,12 @@ pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
+pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44305,17 +45666,12 @@ pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
-    unsafe {
-        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44331,13 +45687,12 @@ pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
+pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44353,16 +45708,12 @@ pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
-    unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44378,13 +45729,12 @@ pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
+pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44400,16 +45750,12 @@ pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
-    unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44425,13 +45771,12 @@ pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
+pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44447,16 +45792,12 @@ pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
-    unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44472,13 +45813,12 @@ pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
+pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44494,16 +45834,12 @@ pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
-    unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44519,13 +45855,12 @@ pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
+pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44541,16 +45876,12 @@ pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
-    unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44566,13 +45897,12 @@ pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
+pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44588,16 +45918,12 @@ pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
-    unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44613,13 +45939,12 @@ pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
+pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44635,16 +45960,12 @@ pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
-    unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44660,13 +45981,12 @@ pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
+pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44682,16 +46002,12 @@ pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
-    unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44707,13 +46023,12 @@ pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
+pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44729,16 +46044,12 @@ pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
-    unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44754,13 +46065,12 @@ pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
+pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44776,16 +46086,12 @@ pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
-    unsafe {
-        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44801,13 +46107,12 @@ pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
+pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44823,17 +46128,12 @@ pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
-    unsafe {
-        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44849,13 +46149,12 @@ pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
+pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44871,21 +46170,12 @@ pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
-    unsafe {
-        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44901,13 +46191,12 @@ pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
+pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44923,17 +46212,12 @@ pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
-    unsafe {
-        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44949,13 +46233,12 @@ pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
+pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44971,17 +46254,12 @@ pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
-    unsafe {
-        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44997,13 +46275,12 @@ pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
+pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45019,21 +46296,12 @@ pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
-    unsafe {
-        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45049,13 +46317,12 @@ pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
+pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45071,17 +46338,12 @@ pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
-    unsafe {
-        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45097,13 +46359,12 @@ pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
+pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45119,17 +46380,12 @@ pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
-    unsafe {
-        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45145,13 +46401,12 @@ pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
+pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45167,21 +46422,12 @@ pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
-    unsafe {
-        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45197,13 +46443,12 @@ pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
+pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45219,17 +46464,12 @@ pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
-    unsafe {
-        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45245,13 +46485,12 @@ pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
+pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45267,17 +46506,12 @@ pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
-    unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45293,13 +46527,12 @@ pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
+pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45315,17 +46548,12 @@ pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
-    unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45341,13 +46569,12 @@ pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
+pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45363,17 +46590,12 @@ pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
-    unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45389,13 +46611,12 @@ pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
+pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45411,16 +46632,12 @@ pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
-    unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45436,13 +46653,12 @@ pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
+pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45458,17 +46674,12 @@ pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
-    unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45484,13 +46695,12 @@ pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
+pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45506,17 +46716,12 @@ pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
-    unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45532,13 +46737,12 @@ pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
+pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45554,16 +46758,12 @@ pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
-    unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45579,13 +46779,12 @@ pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
+pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45601,17 +46800,12 @@ pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
-    unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45627,13 +46821,12 @@ pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
+pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45649,16 +46842,12 @@ pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
-    unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45674,13 +46863,12 @@ pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
+pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45696,17 +46884,12 @@ pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
-    unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45722,13 +46905,12 @@ pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
+pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45744,18 +46926,12 @@ pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
-    unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45771,13 +46947,12 @@ pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
+pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45793,18 +46968,12 @@ pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
-    unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45820,13 +46989,12 @@ pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
+pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45842,18 +47010,12 @@ pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
-    unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45869,13 +47031,12 @@ pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
+pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45891,18 +47052,12 @@ pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
-    unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45918,13 +47073,12 @@ pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
+pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45940,18 +47094,12 @@ pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
-    unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45967,13 +47115,12 @@ pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
+pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45989,18 +47136,12 @@ pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
-    unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46016,13 +47157,12 @@ pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
+pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46038,18 +47178,12 @@ pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
-    unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46065,13 +47199,12 @@ pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
+pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46087,18 +47220,12 @@ pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
-    unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46114,13 +47241,12 @@ pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
+pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46136,18 +47262,12 @@ pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
-    unsafe {
-        let a: uint8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46163,13 +47283,12 @@ pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
+pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46185,17 +47304,12 @@ pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
-    unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46211,13 +47325,12 @@ pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
+pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46233,17 +47346,12 @@ pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
-    unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46259,13 +47367,12 @@ pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
+pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46281,17 +47388,12 @@ pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
-    unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46307,13 +47409,12 @@ pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
+pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46329,16 +47430,12 @@ pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
-    unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46354,13 +47451,12 @@ pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
+pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46376,17 +47472,12 @@ pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
-    unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46402,13 +47493,12 @@ pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
+pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46424,17 +47514,12 @@ pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
-    unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46450,13 +47535,12 @@ pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
+pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46472,16 +47556,12 @@ pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
-    unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46497,13 +47577,12 @@ pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
+pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46519,17 +47598,12 @@ pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
-    unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46545,13 +47619,12 @@ pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
+pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46567,16 +47640,12 @@ pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
-    unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46592,13 +47661,12 @@ pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
+pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46614,16 +47682,12 @@ pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
-    unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46639,13 +47703,12 @@ pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
+pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46661,17 +47724,12 @@ pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
-    unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46687,13 +47745,12 @@ pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
+pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46709,21 +47766,12 @@ pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
-    unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46739,13 +47787,12 @@ pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
+pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46761,17 +47808,12 @@ pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
-    unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46787,13 +47829,12 @@ pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
+pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46809,17 +47850,12 @@ pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
-    unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46835,13 +47871,12 @@ pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
+pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46857,21 +47892,12 @@ pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
-    unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46887,13 +47913,12 @@ pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
+pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46909,17 +47934,12 @@ pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
-    unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46935,13 +47955,12 @@ pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
+pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46957,17 +47976,12 @@ pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
-    unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46983,13 +47997,12 @@ pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
+pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47005,21 +48018,12 @@ pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
-    unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47035,13 +48039,12 @@ pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
+pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47057,17 +48060,12 @@ pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
-    unsafe {
-        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47083,13 +48081,12 @@ pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
+pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47105,17 +48102,12 @@ pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
-    unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47131,13 +48123,12 @@ pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
+pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47153,17 +48144,12 @@ pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
-    unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47179,13 +48165,12 @@ pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
+pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47201,16 +48186,12 @@ pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
-    unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47226,13 +48207,12 @@ pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
+pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47248,17 +48228,12 @@ pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
-    unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47274,13 +48249,12 @@ pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
+pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47296,17 +48270,12 @@ pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
-    unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47322,13 +48291,12 @@ pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
+pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47344,16 +48312,12 @@ pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
-    unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47369,13 +48333,12 @@ pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
+pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47391,17 +48354,12 @@ pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
-    unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47417,13 +48375,12 @@ pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
+pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47439,17 +48396,12 @@ pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
-    unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47465,13 +48417,12 @@ pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
+pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47487,16 +48438,12 @@ pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
-    unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47512,13 +48459,12 @@ pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
+pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47534,16 +48480,12 @@ pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
-    unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
-    }
+pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47559,13 +48501,12 @@ pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
+pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47581,21 +48522,12 @@ pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
-    unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47611,13 +48543,12 @@ pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
+pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47633,17 +48564,12 @@ pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
-    unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47659,13 +48585,12 @@ pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
+pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47681,17 +48606,12 @@ pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
-    unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47707,13 +48627,12 @@ pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
+pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47729,21 +48648,12 @@ pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
-    unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47759,13 +48669,12 @@ pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
+pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47781,17 +48690,12 @@ pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
-    unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47807,13 +48711,12 @@ pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
+pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -47829,19 +48732,14 @@ pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
-    unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -47855,15 +48753,14 @@ pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
+pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -47877,23 +48774,14 @@ pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
-    unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -47907,15 +48795,14 @@ pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
+pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -47929,19 +48816,14 @@ pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
-    unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -47955,15 +48837,14 @@ pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
+pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -47977,19 +48858,14 @@ pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
-    unsafe {
-        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -48003,15 +48879,14 @@ pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
+pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -48025,18 +48900,14 @@ pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
-    unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -48050,15 +48921,14 @@ pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
+pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -48072,18 +48942,14 @@ pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
-    unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -48097,15 +48963,14 @@ pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
+pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p64)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -48119,22 +48984,19 @@ pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
-    unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 {
+    unsafe { transmute(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev16)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48144,19 +49006,19 @@ pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
-    unsafe { transmute(a) }
+pub fn vrev16_p8(a: poly8x8_t) -> poly8x8_t {
+    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev16)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48166,22 +49028,23 @@ pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
+pub fn vrev16_p8(a: poly8x8_t) -> poly8x8_t {
     unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x8_t = simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev16)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48191,19 +49054,19 @@ pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
-    unsafe { transmute(a) }
+pub fn vrev16_s8(a: int8x8_t) -> int8x8_t {
+    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev16)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48213,22 +49076,23 @@ pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
+pub fn vrev16_s8(a: int8x8_t) -> int8x8_t {
     unsafe {
-        let ret_val: uint8x8_t = transmute(a);
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev16)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48238,19 +49102,19 @@ pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
-    unsafe { transmute(a) }
+pub fn vrev16_u8(a: uint8x8_t) -> uint8x8_t {
+    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev16)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48260,22 +49124,23 @@ pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
+pub fn vrev16_u8(a: uint8x8_t) -> uint8x8_t {
     unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x8_t = simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev16)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48285,19 +49150,19 @@ pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
-    unsafe { transmute(a) }
+pub fn vrev16q_p8(a: poly8x16_t) -> poly8x16_t {
+    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev16)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48307,22 +49172,29 @@ pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
+pub fn vrev16q_p8(a: poly8x16_t) -> poly8x16_t {
     unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t =
+            simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev16)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48332,19 +49204,19 @@ pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
-    unsafe { transmute(a) }
+pub fn vrev16q_s8(a: int8x16_t) -> int8x16_t {
+    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev16)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48354,22 +49226,29 @@ pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
+pub fn vrev16q_s8(a: int8x16_t) -> int8x16_t {
     unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t =
+            simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev16)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48379,19 +49258,19 @@ pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
-    unsafe { transmute(a) }
+pub fn vrev16q_u8(a: uint8x16_t) -> uint8x16_t {
+    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev16)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48401,22 +49280,29 @@ pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
+pub fn vrev16q_u8(a: uint8x16_t) -> uint8x16_t {
     unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t =
+            simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48426,19 +49312,19 @@ pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
-    unsafe { transmute(a) }
+pub fn vrev32_p16(a: poly16x4_t) -> poly16x4_t {
+    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48448,22 +49334,23 @@ pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
+pub fn vrev32_p16(a: poly16x4_t) -> poly16x4_t {
     unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        transmute(a)
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly16x4_t = simd_shuffle!(a, a, [1, 0, 3, 2]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48473,19 +49360,19 @@ pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
-    unsafe { transmute(a) }
+pub fn vrev32_p8(a: poly8x8_t) -> poly8x8_t {
+    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48495,23 +49382,23 @@ pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
+pub fn vrev32_p8(a: poly8x8_t) -> poly8x8_t {
     unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x8_t = simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48521,19 +49408,19 @@ pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
-    unsafe { transmute(a) }
+pub fn vrev32_s16(a: int16x4_t) -> int16x4_t {
+    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48543,27 +49430,23 @@ pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
+pub fn vrev32_s16(a: int16x4_t) -> int16x4_t {
     unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int16x4_t = simd_shuffle!(a, a, [1, 0, 3, 2]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48573,19 +49456,19 @@ pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
-    unsafe { transmute(a) }
+pub fn vrev32_s8(a: int8x8_t) -> int8x8_t {
+    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48595,23 +49478,23 @@ pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
+pub fn vrev32_s8(a: int8x8_t) -> int8x8_t {
     unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int16x8_t = transmute(a);
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48621,19 +49504,19 @@ pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
-    unsafe { transmute(a) }
+pub fn vrev32_u16(a: uint16x4_t) -> uint16x4_t {
+    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48643,23 +49526,23 @@ pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
+pub fn vrev32_u16(a: uint16x4_t) -> uint16x4_t {
     unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int32x4_t = transmute(a);
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint16x4_t = simd_shuffle!(a, a, [1, 0, 3, 2]);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48669,19 +49552,19 @@ pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
-    unsafe { transmute(a) }
+pub fn vrev32_u8(a: uint8x8_t) -> uint8x8_t {
+    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48691,27 +49574,23 @@ pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
+pub fn vrev32_u8(a: uint8x8_t) -> uint8x8_t {
     unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x8_t = simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48721,19 +49600,19 @@ pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
-    unsafe { transmute(a) }
+pub fn vrev32q_p16(a: poly16x8_t) -> poly16x8_t {
+    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48743,23 +49622,23 @@ pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
+pub fn vrev32q_p16(a: poly16x8_t) -> poly16x8_t {
     unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint16x8_t = transmute(a);
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly16x8_t = simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48769,19 +49648,19 @@ pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
-    unsafe { transmute(a) }
+pub fn vrev32q_p8(a: poly8x16_t) -> poly8x16_t {
+    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48791,23 +49670,29 @@ pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
+pub fn vrev32q_p8(a: poly8x16_t) -> poly8x16_t {
     unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t =
+            simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48817,19 +49702,19 @@ pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
-    unsafe { transmute(a) }
+pub fn vrev32q_s16(a: int16x8_t) -> int16x8_t {
+    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48839,27 +49724,23 @@ pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
+pub fn vrev32q_s16(a: int16x8_t) -> int16x8_t {
     unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x8_t = simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48869,19 +49750,19 @@ pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
-    unsafe { transmute(a) }
+pub fn vrev32q_s8(a: int8x16_t) -> int8x16_t {
+    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48891,23 +49772,29 @@ pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
+pub fn vrev32q_s8(a: int8x16_t) -> int8x16_t {
     unsafe {
-        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t =
+            simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48917,19 +49804,19 @@ pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
-    unsafe { transmute(a) }
+pub fn vrev32q_u16(a: uint16x8_t) -> uint16x8_t {
+    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48939,23 +49826,23 @@ pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
+pub fn vrev32q_u16(a: uint16x8_t) -> uint16x8_t {
     unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x8_t = simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48965,19 +49852,19 @@ pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
-    unsafe { transmute(a) }
+pub fn vrev32q_u8(a: uint8x16_t) -> uint8x16_t {
+    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -48987,23 +49874,29 @@ pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
+pub fn vrev32q_u8(a: uint8x16_t) -> uint8x16_t {
     unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t =
+            simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49013,19 +49906,19 @@ pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
-    unsafe { transmute(a) }
+pub fn vrev64_f32(a: float32x2_t) -> float32x2_t {
+    unsafe { simd_shuffle!(a, a, [1, 0]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49035,23 +49928,23 @@ pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
+pub fn vrev64_f32(a: float32x2_t) -> float32x2_t {
     unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int32x2_t = transmute(a);
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: float32x2_t = simd_shuffle!(a, a, [1, 0]);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49061,19 +49954,19 @@ pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
-    unsafe { transmute(a) }
+pub fn vrev64_p16(a: poly16x4_t) -> poly16x4_t {
+    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49083,22 +49976,23 @@ pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
+pub fn vrev64_p16(a: poly16x4_t) -> poly16x4_t {
     unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49108,19 +50002,19 @@ pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
-    unsafe { transmute(a) }
+pub fn vrev64_p8(a: poly8x8_t) -> poly8x8_t {
+    unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49130,23 +50024,23 @@ pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
+pub fn vrev64_p8(a: poly8x8_t) -> poly8x8_t {
     unsafe {
         let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let ret_val: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49156,19 +50050,19 @@ pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
-    unsafe { transmute(a) }
+pub fn vrev64_s16(a: int16x4_t) -> int16x4_t {
+    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49178,23 +50072,23 @@ pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
+pub fn vrev64_s16(a: int16x4_t) -> int16x4_t {
     unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49204,19 +50098,19 @@ pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
-    unsafe { transmute(a) }
+pub fn vrev64_s32(a: int32x2_t) -> int32x2_t {
+    unsafe { simd_shuffle!(a, a, [1, 0]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49226,22 +50120,23 @@ pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
+pub fn vrev64_s32(a: int32x2_t) -> int32x2_t {
     unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49251,19 +50146,19 @@ pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
-    unsafe { transmute(a) }
+pub fn vrev64_s8(a: int8x8_t) -> int8x8_t {
+    unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49273,23 +50168,23 @@ pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
+pub fn vrev64_s8(a: int8x8_t) -> int8x8_t {
     unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49299,19 +50194,19 @@ pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
-    unsafe { transmute(a) }
+pub fn vrev64_u16(a: uint16x4_t) -> uint16x4_t {
+    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49321,22 +50216,23 @@ pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
+pub fn vrev64_u16(a: uint16x4_t) -> uint16x4_t {
     unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49346,19 +50242,19 @@ pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
-    unsafe { transmute(a) }
+pub fn vrev64_u32(a: uint32x2_t) -> uint32x2_t {
+    unsafe { simd_shuffle!(a, a, [1, 0]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49368,23 +50264,23 @@ pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
+pub fn vrev64_u32(a: uint32x2_t) -> uint32x2_t {
     unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49394,19 +50290,19 @@ pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
-    unsafe { transmute(a) }
+pub fn vrev64_u8(a: uint8x8_t) -> uint8x8_t {
+    unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49416,24 +50312,23 @@ pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
+pub fn vrev64_u8(a: uint8x8_t) -> uint8x8_t {
     unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49443,19 +50338,19 @@ pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
-    unsafe { transmute(a) }
+pub fn vrev64q_f32(a: float32x4_t) -> float32x4_t {
+    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49465,24 +50360,23 @@ pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
+pub fn vrev64q_f32(a: float32x4_t) -> float32x4_t {
     unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: float32x4_t = simd_shuffle!(a, a, [1, 0, 3, 2]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49492,19 +50386,19 @@ pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
-    unsafe { transmute(a) }
+pub fn vrev64q_p16(a: poly16x8_t) -> poly16x8_t {
+    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49514,24 +50408,23 @@ pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
+pub fn vrev64q_p16(a: poly16x8_t) -> poly16x8_t {
     unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly16x8_t = simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49541,19 +50434,19 @@ pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
-    unsafe { transmute(a) }
+pub fn vrev64q_p8(a: poly8x16_t) -> poly8x16_t {
+    unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49563,24 +50456,29 @@ pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
+pub fn vrev64q_p8(a: poly8x16_t) -> poly8x16_t {
     unsafe {
         let a: poly8x16_t =
             simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let ret_val: poly8x16_t =
+            simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49590,19 +50488,19 @@ pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
-    unsafe { transmute(a) }
+pub fn vrev64q_s16(a: int16x8_t) -> int16x8_t {
+    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49612,24 +50510,23 @@ pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
+pub fn vrev64q_s16(a: int16x8_t) -> int16x8_t {
     unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint16x8_t = transmute(a);
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x8_t = simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49639,19 +50536,19 @@ pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
-    unsafe { transmute(a) }
+pub fn vrev64q_s32(a: int32x4_t) -> int32x4_t {
+    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49661,24 +50558,23 @@ pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
+pub fn vrev64q_s32(a: int32x4_t) -> int32x4_t {
     unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint32x4_t = transmute(a);
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int32x4_t = simd_shuffle!(a, a, [1, 0, 3, 2]);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49688,19 +50584,19 @@ pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
-    unsafe { transmute(a) }
+pub fn vrev64q_s8(a: int8x16_t) -> int8x16_t {
+    unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49710,24 +50606,29 @@ pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
+pub fn vrev64q_s8(a: int8x16_t) -> int8x16_t {
     unsafe {
-        let a: poly8x16_t =
+        let a: int8x16_t =
             simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let ret_val: int8x16_t =
+            simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49737,19 +50638,19 @@ pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
-    unsafe { transmute(a) }
+pub fn vrev64q_u16(a: uint16x8_t) -> uint16x8_t {
+    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49759,24 +50660,23 @@ pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
+pub fn vrev64q_u16(a: uint16x8_t) -> uint16x8_t {
     unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly16x8_t = transmute(a);
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x8_t = simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49786,19 +50686,19 @@ pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
-    unsafe { transmute(a) }
+pub fn vrev64q_u32(a: uint32x4_t) -> uint32x4_t {
+    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49808,24 +50708,23 @@ pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
+pub fn vrev64q_u32(a: uint32x4_t) -> uint32x4_t {
     unsafe {
-        let a: poly8x16_t =
-            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = simd_shuffle!(a, a, [1, 0, 3, 2]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49835,19 +50734,19 @@ pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
-    unsafe { transmute(a) }
+pub fn vrev64q_u8(a: uint8x16_t) -> uint8x16_t {
+    unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"]
+#[doc = "Reversing vector elements (swap endianness)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49857,119 +50756,128 @@ pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
+pub fn vrev64q_u8(a: uint8x16_t) -> uint8x16_t {
     unsafe {
-        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t =
+            simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"]
+#[doc = "Reverse elements in 64-bit doublewords"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrev64))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
-    unsafe { transmute(a) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vrev64_f16(a: float16x4_t) -> float16x4_t {
+    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"]
+#[doc = "Reverse elements in 64-bit doublewords"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrev64))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vrev64_f16(a: float16x4_t) -> float16x4_t {
     unsafe {
-        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"]
+#[doc = "Reverse elements in 64-bit doublewords"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrev64))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
-    unsafe { transmute(a) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vrev64q_f16(a: float16x8_t) -> float16x8_t {
+    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"]
+#[doc = "Reverse elements in 64-bit doublewords"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrev64))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev64)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vrev64q_f16(a: float16x8_t) -> float16x8_t {
     unsafe {
-        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x8_t = simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"]
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srhadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -49979,19 +50887,26 @@ pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
-    unsafe { transmute(a) }
+pub fn vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srhadd.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i8")]
+        fn _vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    unsafe { _vrhadd_s8(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"]
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srhadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50001,22 +50916,26 @@ pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
-    unsafe {
-        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
+pub fn vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srhadd.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v16i8")]
+        fn _vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
+    unsafe { _vrhaddq_s8(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"]
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srhadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50026,19 +50945,26 @@ pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
-    unsafe { transmute(a) }
+pub fn vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srhadd.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i16")]
+        fn _vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    unsafe { _vrhadd_s16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"]
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srhadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50048,23 +50974,26 @@ pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
-    unsafe {
-        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+pub fn vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srhadd.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i16")]
+        fn _vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
+    unsafe { _vrhaddq_s16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"]
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srhadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50074,19 +51003,26 @@ pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
-    unsafe { transmute(a) }
+pub fn vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srhadd.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v2i32")]
+        fn _vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    unsafe { _vrhadd_s32(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"]
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srhadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50096,23 +51032,26 @@ pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
-    unsafe {
-        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+pub fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srhadd.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i32")]
+        fn _vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
+    unsafe { _vrhaddq_s32(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"]
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urhadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50122,19 +51061,26 @@ pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
-    unsafe { transmute(a) }
+pub fn vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urhadd.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i8")]
+        fn _vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
+    }
+    unsafe { _vrhadd_u8(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"]
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urhadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50144,22 +51090,26 @@ pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
-    unsafe {
-        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
+pub fn vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urhadd.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v16i8")]
+        fn _vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
     }
+    unsafe { _vrhaddq_u8(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"]
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urhadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50169,19 +51119,26 @@ pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
-    unsafe { transmute(a) }
+pub fn vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urhadd.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i16")]
+        fn _vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
+    }
+    unsafe { _vrhadd_u16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"]
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urhadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50191,23 +51148,26 @@ pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
-    unsafe {
-        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+pub fn vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urhadd.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i16")]
+        fn _vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
     }
+    unsafe { _vrhaddq_u16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"]
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urhadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50217,19 +51177,26 @@ pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
-    unsafe { transmute(a) }
+pub fn vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urhadd.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v2i32")]
+        fn _vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
+    }
+    unsafe { _vrhadd_u32(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"]
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urhadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50239,69 +51206,84 @@ pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
-    unsafe {
-        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
-        transmute(a)
+pub fn vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urhadd.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i32")]
+        fn _vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
     }
+    unsafe { _vrhaddq_u32(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"]
+#[doc = "Floating-point round to integral, to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frintn)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
-    unsafe { transmute(a) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vrndn_f16(a: float16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"),
+            link_name = "llvm.roundeven.v4f16"
+        )]
+        fn _vrndn_f16(a: float16x4_t) -> float16x4_t;
+    }
+    unsafe { _vrndn_f16(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"]
+#[doc = "Floating-point round to integral, to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frintn)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
-    unsafe {
-        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        transmute(a)
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vrndnq_f16(a: float16x8_t) -> float16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"),
+            link_name = "llvm.roundeven.v8f16"
+        )]
+        fn _vrndnq_f16(a: float16x8_t) -> float16x8_t;
     }
+    unsafe { _vrndnq_f16(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"]
+#[doc = "Floating-point round to integral, to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frintn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50311,19 +51293,25 @@ pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
-    unsafe { transmute(a) }
+pub fn vrndn_f32(a: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"),
+            link_name = "llvm.roundeven.v2f32"
+        )]
+        fn _vrndn_f32(a: float32x2_t) -> float32x2_t;
+    }
+    unsafe { _vrndn_f32(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"]
+#[doc = "Floating-point round to integral, to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
+#[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frintn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50333,23 +51321,25 @@ pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
-    unsafe {
-        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+pub fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"),
+            link_name = "llvm.roundeven.v4f32"
+        )]
+        fn _vrndnq_f32(a: float32x4_t) -> float32x4_t;
     }
+    unsafe { _vrndnq_f32(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"]
+#[doc = "Signed rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50359,19 +51349,26 @@ pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
-    unsafe { transmute(a) }
+pub fn vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srshl.v8i8"
+        )]
+        fn _vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    unsafe { _vrshl_s8(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"]
+#[doc = "Signed rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50381,27 +51378,26 @@ pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
-    unsafe {
-        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+pub fn vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srshl.v16i8"
+        )]
+        fn _vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
+    unsafe { _vrshlq_s8(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"]
+#[doc = "Signed rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50411,19 +51407,26 @@ pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
-    unsafe { transmute(a) }
+pub fn vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srshl.v4i16"
+        )]
+        fn _vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    unsafe { _vrshl_s16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"]
+#[doc = "Signed rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50433,23 +51436,26 @@ pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
-    unsafe {
-        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+pub fn vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srshl.v8i16"
+        )]
+        fn _vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
+    unsafe { _vrshlq_s16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"]
+#[doc = "Signed rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50459,19 +51465,26 @@ pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
-    unsafe { transmute(a) }
+pub fn vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srshl.v2i32"
+        )]
+        fn _vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    unsafe { _vrshl_s32(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"]
+#[doc = "Signed rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50481,23 +51494,26 @@ pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
-    unsafe {
-        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+pub fn vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srshl.v4i32"
+        )]
+        fn _vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
+    unsafe { _vrshlq_s32(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"]
+#[doc = "Signed rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50507,19 +51523,26 @@ pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
-    unsafe { transmute(a) }
+pub fn vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v1i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srshl.v1i64"
+        )]
+        fn _vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+    }
+    unsafe { _vrshl_s64(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"]
+#[doc = "Signed rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50529,27 +51552,26 @@ pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
-    unsafe {
-        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+pub fn vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srshl.v2i64"
+        )]
+        fn _vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
     }
+    unsafe { _vrshlq_s64(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"]
+#[doc = "Unsigned rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50559,19 +51581,26 @@ pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
-    unsafe { transmute(a) }
+pub fn vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urshl.v8i8"
+        )]
+        fn _vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
+    }
+    unsafe { _vrshl_u8(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"]
+#[doc = "Unsigned rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50581,23 +51610,26 @@ pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
-    unsafe {
-        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+pub fn vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urshl.v16i8"
+        )]
+        fn _vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
     }
+    unsafe { _vrshlq_u8(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"]
+#[doc = "Unsigned rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50607,19 +51639,26 @@ pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
-    unsafe { transmute(a) }
+pub fn vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urshl.v4i16"
+        )]
+        fn _vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
+    }
+    unsafe { _vrshl_u16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"]
+#[doc = "Unsigned rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50629,23 +51668,26 @@ pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
-    unsafe {
-        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+pub fn vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urshl.v8i16"
+        )]
+        fn _vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
     }
+    unsafe { _vrshlq_u16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"]
+#[doc = "Unsigned rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50655,19 +51697,26 @@ pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
-    unsafe { transmute(a) }
+pub fn vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urshl.v2i32"
+        )]
+        fn _vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
+    }
+    unsafe { _vrshl_u32(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"]
+#[doc = "Unsigned rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50677,27 +51726,26 @@ pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
-    unsafe {
-        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+pub fn vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urshl.v4i32"
+        )]
+        fn _vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
     }
+    unsafe { _vrshlq_u32(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"]
+#[doc = "Unsigned rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50707,19 +51755,26 @@ pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
-    unsafe { transmute(a) }
+pub fn vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v1i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urshl.v1i64"
+        )]
+        fn _vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
+    }
+    unsafe { _vrshl_u64(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"]
+#[doc = "Unsigned rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -50729,24 +51784,28 @@ pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
-    unsafe {
-        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+pub fn vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urshl.v2i64"
+        )]
+        fn _vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
     }
+    unsafe { _vrshlq_u64(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"]
+#[doc = "Signed rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srshr, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -50755,20 +51814,21 @@ pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
-    unsafe { transmute(a) }
+pub fn vrshr_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    vrshl_s8(a, vdup_n_s8(-N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"]
+#[doc = "Signed rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srshr, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -50777,23 +51837,21 @@ pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
-    unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vrshrq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    vrshlq_s8(a, vdupq_n_s8(-N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"]
+#[doc = "Signed rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srshr, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -50802,20 +51860,21 @@ pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
-    unsafe { transmute(a) }
+pub fn vrshr_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    vrshl_s16(a, vdup_n_s16(-N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"]
+#[doc = "Signed rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srshr, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -50824,23 +51883,21 @@ pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
-    unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vrshrq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    vrshlq_s16(a, vdupq_n_s16(-N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"]
+#[doc = "Signed rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srshr, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -50849,20 +51906,21 @@ pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
-    unsafe { transmute(a) }
+pub fn vrshr_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    vrshl_s32(a, vdup_n_s32(-N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"]
+#[doc = "Signed rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srshr, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -50871,23 +51929,21 @@ pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
-    unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vrshrq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    vrshlq_s32(a, vdupq_n_s32(-N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"]
+#[doc = "Signed rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srshr, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -50896,20 +51952,21 @@ pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
-    unsafe { transmute(a) }
+pub fn vrshr_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    vrshl_s64(a, vdup_n_s64(-N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"]
+#[doc = "Signed rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srshr, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -50918,23 +51975,21 @@ pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
-    unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vrshrq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    vrshlq_s64(a, vdupq_n_s64(-N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"]
+#[doc = "Unsigned rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urshr, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -50943,20 +51998,21 @@ pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
-    unsafe { transmute(a) }
+pub fn vrshr_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    vrshl_u8(a, vdup_n_s8(-N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"]
+#[doc = "Unsigned rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urshr, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -50965,23 +52021,21 @@ pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
-    unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vrshrq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    vrshlq_u8(a, vdupq_n_s8(-N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"]
+#[doc = "Unsigned rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urshr, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -50990,20 +52044,21 @@ pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
-    unsafe { transmute(a) }
+pub fn vrshr_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    vrshl_u16(a, vdup_n_s16(-N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"]
+#[doc = "Unsigned rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urshr, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51012,23 +52067,21 @@ pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
-    unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vrshrq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    vrshlq_u16(a, vdupq_n_s16(-N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"]
+#[doc = "Unsigned rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urshr, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51037,20 +52090,21 @@ pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
-    unsafe { transmute(a) }
+pub fn vrshr_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    vrshl_u32(a, vdup_n_s32(-N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"]
+#[doc = "Unsigned rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urshr, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51059,23 +52113,21 @@ pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
-    unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vrshrq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    vrshlq_u32(a, vdupq_n_s32(-N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)"]
+#[doc = "Unsigned rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urshr, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51084,20 +52136,21 @@ pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
-    unsafe { transmute(a) }
+pub fn vrshr_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    vrshl_u64(a, vdup_n_s64(-N as _))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)"]
+#[doc = "Unsigned rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urshr, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51106,23 +52159,126 @@ pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
-    unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+pub fn vrshrq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    vrshlq_u64(a, vdupq_n_s64(-N as _))
+}
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s16)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v8i8")]
+        fn _vrshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
     }
+    unsafe { _vrshrn_n_s16(a, const { int16x8_t([-N as i16; 8]) }) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)"]
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s32)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v4i16")]
+        fn _vrshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
+    }
+    unsafe { _vrshrn_n_s32(a, const { int32x4_t([-N; 4]) }) }
+}
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s64)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v2i32")]
+        fn _vrshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
+    }
+    unsafe { _vrshrn_n_s64(a, const { int64x2_t([-N as i64; 2]) }) }
+}
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(rshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.rshrn.v8i8"
+        )]
+        fn _vrshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t;
+    }
+    unsafe { _vrshrn_n_s16(a, N) }
+}
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(rshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.rshrn.v4i16"
+        )]
+        fn _vrshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t;
+    }
+    unsafe { _vrshrn_n_s32(a, N) }
+}
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(rshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.rshrn.v2i32"
+        )]
+        fn _vrshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t;
+    }
+    unsafe { _vrshrn_n_s64(a, N) }
+}
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rshrn, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51131,20 +52287,21 @@ pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
-    unsafe { transmute(a) }
+pub fn vrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe { transmute(vrshrn_n_s16::<N>(transmute(a))) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)"]
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rshrn, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51153,28 +52310,21 @@ pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
-    unsafe {
-        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe { transmute(vrshrn_n_s32::<N>(transmute(a))) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"]
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rshrn, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51183,67 +52333,79 @@ pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
-    unsafe { transmute(a) }
+pub fn vrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe { transmute(vrshrn_n_s64::<N>(transmute(a))) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"]
+#[doc = "Reciprocal square-root estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frsqrte)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
-    unsafe {
-        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vrsqrte_f16(a: float16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrte.v4f16"
+        )]
+        fn _vrsqrte_f16(a: float16x4_t) -> float16x4_t;
     }
+    unsafe { _vrsqrte_f16(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"]
+#[doc = "Reciprocal square-root estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frsqrte)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
-    unsafe { transmute(a) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vrsqrteq_f16(a: float16x8_t) -> float16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v8f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrte.v8f16"
+        )]
+        fn _vrsqrteq_f16(a: float16x8_t) -> float16x8_t;
+    }
+    unsafe { _vrsqrteq_f16(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"]
+#[doc = "Reciprocal square-root estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frsqrte)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -51253,23 +52415,26 @@ pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
-    unsafe {
-        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+pub fn vrsqrte_f32(a: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrte.v2f32"
+        )]
+        fn _vrsqrte_f32(a: float32x2_t) -> float32x2_t;
     }
+    unsafe { _vrsqrte_f32(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"]
+#[doc = "Reciprocal square-root estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frsqrte)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -51279,19 +52444,26 @@ pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
-    unsafe { transmute(a) }
+pub fn vrsqrteq_f32(a: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrte.v4f32"
+        )]
+        fn _vrsqrteq_f32(a: float32x4_t) -> float32x4_t;
+    }
+    unsafe { _vrsqrteq_f32(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"]
+#[doc = "Unsigned reciprocal square root estimate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(ursqrte)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -51301,27 +52473,26 @@ pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
-    unsafe {
-        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+pub fn vrsqrte_u32(a: uint32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ursqrte.v2i32"
+        )]
+        fn _vrsqrte_u32(a: uint32x2_t) -> uint32x2_t;
     }
+    unsafe { _vrsqrte_u32(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"]
+#[doc = "Unsigned reciprocal square root estimate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(ursqrte)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -51331,67 +52502,86 @@ pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
-    unsafe { transmute(a) }
+pub fn vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ursqrte.v4i32"
+        )]
+        fn _vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t;
+    }
+    unsafe { _vrsqrteq_u32(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"]
+#[doc = "Floating-point reciprocal square root step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frsqrts)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
-    unsafe {
-        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vrsqrts_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v4f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrts.v4f16"
+        )]
+        fn _vrsqrts_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
     }
+    unsafe { _vrsqrts_f16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"]
+#[doc = "Floating-point reciprocal square root step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frsqrts)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
-    unsafe { transmute(a) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vrsqrtsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v8f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrts.v8f16"
+        )]
+        fn _vrsqrtsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
+    }
+    unsafe { _vrsqrtsq_f16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"]
+#[doc = "Floating-point reciprocal square root step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frsqrts)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -51401,23 +52591,26 @@ pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
-    unsafe {
-        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+pub fn vrsqrts_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrts.v2f32"
+        )]
+        fn _vrsqrts_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
+    unsafe { _vrsqrts_f32(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"]
+#[doc = "Floating-point reciprocal square root step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frsqrts)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -51427,20 +52620,28 @@ pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"]
+pub fn vrsqrtsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrts.v4f32"
+        )]
+        fn _vrsqrtsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    unsafe { _vrsqrtsq_f32(a, b) }
+}
+#[doc = "Signed rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srsra, N = 2)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51449,28 +52650,21 @@ pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
-    unsafe {
-        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vrsra_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe { simd_add(a, vrshr_n_s8::<N>(b)) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p64)"]
+#[doc = "Signed rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srsra, N = 2)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51479,20 +52673,21 @@ pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
-    unsafe { transmute(a) }
+pub fn vrsraq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe { simd_add(a, vrshrq_n_s8::<N>(b)) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p64)"]
+#[doc = "Signed rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srsra, N = 2)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51501,23 +52696,21 @@ pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
-    unsafe {
-        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vrsra_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe { simd_add(a, vrshr_n_s16::<N>(b)) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"]
+#[doc = "Signed rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s16)"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srsra, N = 2)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51526,19 +52719,21 @@ pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
-    unsafe { transmute(a) }
+pub fn vrsraq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe { simd_add(a, vrshrq_n_s16::<N>(b)) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"]
+#[doc = "Signed rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s32)"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srsra, N = 2)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51547,19 +52742,21 @@ pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
-    unsafe { transmute(a) }
+pub fn vrsra_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe { simd_add(a, vrshr_n_s32::<N>(b)) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"]
+#[doc = "Signed rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s32)"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srsra, N = 2)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51568,19 +52765,21 @@ pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
-    unsafe { transmute(a) }
+pub fn vrsraq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe { simd_add(a, vrshrq_n_s32::<N>(b)) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"]
+#[doc = "Signed rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s64)"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srsra, N = 2)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51589,19 +52788,21 @@ pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
-    unsafe { transmute(a) }
+pub fn vrsra_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    unsafe { simd_add(a, vrshr_n_s64::<N>(b)) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"]
+#[doc = "Signed rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s64)"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(srsra, N = 2)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51610,19 +52811,21 @@ pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
-    unsafe { transmute(a) }
+pub fn vrsraq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    unsafe { simd_add(a, vrshrq_n_s64::<N>(b)) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"]
+#[doc = "Unsigned rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u8)"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(ursra, N = 2)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51631,19 +52834,21 @@ pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
-    unsafe { transmute(a) }
+pub fn vrsra_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe { simd_add(a, vrshr_n_u8::<N>(b)) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"]
+#[doc = "Unsigned rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u8)"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(ursra, N = 2)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51652,19 +52857,21 @@ pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
-    unsafe { transmute(a) }
+pub fn vrsraq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe { simd_add(a, vrshrq_n_u8::<N>(b)) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"]
+#[doc = "Unsigned rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u16)"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(ursra, N = 2)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51673,19 +52880,21 @@ pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
-    unsafe { transmute(a) }
+pub fn vrsra_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe { simd_add(a, vrshr_n_u16::<N>(b)) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"]
+#[doc = "Unsigned rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u16)"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(ursra, N = 2)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51694,19 +52903,21 @@ pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
-    unsafe { transmute(a) }
+pub fn vrsraq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe { simd_add(a, vrshrq_n_u16::<N>(b)) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"]
+#[doc = "Unsigned rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u32)"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(ursra, N = 2)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51715,19 +52926,21 @@ pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
-    unsafe { transmute(a) }
+pub fn vrsra_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe { simd_add(a, vrshr_n_u32::<N>(b)) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"]
+#[doc = "Unsigned rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u32)"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(ursra, N = 2)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51736,19 +52949,21 @@ pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
-    unsafe { transmute(a) }
+pub fn vrsraq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe { simd_add(a, vrshrq_n_u32::<N>(b)) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p64)"]
+#[doc = "Unsigned rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u64)"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(ursra, N = 2)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51757,19 +52972,21 @@ pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 {
-    unsafe { transmute(a) }
+pub fn vrsra_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    unsafe { simd_add(a, vrshr_n_u64::<N>(b)) }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_p8)"]
+#[doc = "Unsigned rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev16)
+    assert_instr(ursra, N = 2)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51778,18 +52995,19 @@ pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev16_p8(a: poly8x8_t) -> poly8x8_t {
-    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) }
+pub fn vrsraq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    unsafe { simd_add(a, vrshrq_n_u64::<N>(b)) }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_s8)"]
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev16)
+    assert_instr(rsubhn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -51799,18 +53017,26 @@ pub fn vrev16_p8(a: poly8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev16_s8(a: int8x8_t) -> int8x8_t {
-    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) }
+pub fn vrsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.rsubhn.v8i8"
+        )]
+        fn _vrsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t;
+    }
+    unsafe { _vrsubhn_s16(a, b) }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_u8)"]
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev16)
+    assert_instr(rsubhn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -51820,18 +53046,26 @@ pub fn vrev16_s8(a: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev16_u8(a: uint8x8_t) -> uint8x8_t {
-    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) }
+pub fn vrsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.rsubhn.v4i16"
+        )]
+        fn _vrsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t;
+    }
+    unsafe { _vrsubhn_s32(a, b) }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_p8)"]
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev16)
+    assert_instr(rsubhn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -51841,18 +53075,26 @@ pub fn vrev16_u8(a: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev16q_p8(a: poly8x16_t) -> poly8x16_t {
-    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) }
+pub fn vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.rsubhn.v2i32"
+        )]
+        fn _vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t;
+    }
+    unsafe { _vrsubhn_s64(a, b) }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_s8)"]
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev16)
+    assert_instr(rsubhn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -51862,18 +53104,18 @@ pub fn vrev16q_p8(a: poly8x16_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev16q_s8(a: int8x16_t) -> int8x16_t {
-    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) }
+pub fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
+    unsafe { transmute(vrsubhn_s16(transmute(a), transmute(b))) }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16q_u8)"]
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev16)
+    assert_instr(rsubhn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -51883,18 +53125,18 @@ pub fn vrev16q_s8(a: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev16q_u8(a: uint8x16_t) -> uint8x16_t {
-    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) }
+pub fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
+    unsafe { transmute(vrsubhn_s32(transmute(a), transmute(b))) }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_p16)"]
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev32)
+    assert_instr(rsubhn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -51904,40 +53146,101 @@ pub fn vrev16q_u8(a: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev32_p16(a: poly16x4_t) -> poly16x4_t {
-    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) }
+pub fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
+    unsafe { transmute(vrsubhn_s64(transmute(a), transmute(b))) }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_p8)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
+#[cfg(target_endian = "little")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev32)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vset_lane_f16<const LANE: i32>(a: f16, b: float16x4_t) -> float16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_insert!(b, LANE as u32, a) }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vset_lane_f16<const LANE: i32>(a: f16, b: float16x4_t) -> float16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe {
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float16x4_t = simd_insert!(b, LANE as u32, a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f16)"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
 )]
-pub fn vrev32_p8(a: poly8x8_t) -> poly8x8_t {
-    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) }
+#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vsetq_lane_f16<const LANE: i32>(a: f16, b: float16x8_t) -> float16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_s16)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vsetq_lane_f16<const LANE: i32>(a: f16, b: float16x8_t) -> float16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe {
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x8_t = simd_insert!(b, LANE as u32, a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev32)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51946,19 +53249,22 @@ pub fn vrev32_p8(a: poly8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev32_s16(a: int16x4_t) -> int16x4_t {
-    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) }
+pub fn vset_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_s8)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev32)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51967,19 +53273,26 @@ pub fn vrev32_s16(a: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev32_s8(a: int8x8_t) -> int8x8_t {
-    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) }
+pub fn vset_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe {
+        let b: float32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: float32x2_t = simd_insert!(b, LANE as u32, a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_u16)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev32)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -51988,19 +53301,22 @@ pub fn vrev32_s8(a: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev32_u16(a: uint16x4_t) -> uint16x4_t {
-    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) }
+pub fn vsetq_lane_f32<const LANE: i32>(a: f32, b: float32x4_t) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32_u8)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev32)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52009,19 +53325,26 @@ pub fn vrev32_u16(a: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev32_u8(a: uint8x8_t) -> uint8x8_t {
-    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) }
+pub fn vsetq_lane_f32<const LANE: i32>(a: f32, b: float32x4_t) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe {
+        let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: float32x4_t = simd_insert!(b, LANE as u32, a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_p16)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev32)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52030,19 +53353,22 @@ pub fn vrev32_u8(a: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev32q_p16(a: poly16x8_t) -> poly16x8_t {
-    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) }
+pub fn vset_lane_s8<const LANE: i32>(a: i8, b: int8x8_t) -> int8x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_p8)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev32)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52051,19 +53377,26 @@ pub fn vrev32q_p16(a: poly16x8_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev32q_p8(a: poly8x16_t) -> poly8x16_t {
-    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) }
+pub fn vset_lane_s8<const LANE: i32>(a: i8, b: int8x8_t) -> int8x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe {
+        let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = simd_insert!(b, LANE as u32, a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_s16)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev32)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52072,19 +53405,22 @@ pub fn vrev32q_p8(a: poly8x16_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev32q_s16(a: int16x8_t) -> int16x8_t {
-    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) }
+pub fn vsetq_lane_s8<const LANE: i32>(a: i8, b: int8x16_t) -> int8x16_t {
+    static_assert_uimm_bits!(LANE, 4);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_s8)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev32)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52093,19 +53429,31 @@ pub fn vrev32q_s16(a: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev32q_s8(a: int8x16_t) -> int8x16_t {
-    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) }
+pub fn vsetq_lane_s8<const LANE: i32>(a: i8, b: int8x16_t) -> int8x16_t {
+    static_assert_uimm_bits!(LANE, 4);
+    unsafe {
+        let b: int8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = simd_insert!(b, LANE as u32, a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_u16)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev32)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52114,19 +53462,22 @@ pub fn vrev32q_s8(a: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev32q_u16(a: uint16x8_t) -> uint16x8_t {
-    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) }
+pub fn vset_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev32q_u8)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev32)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52135,19 +53486,26 @@ pub fn vrev32q_u16(a: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev32q_u8(a: uint8x16_t) -> uint8x16_t {
-    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) }
+pub fn vset_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe {
+        let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: int16x4_t = simd_insert!(b, LANE as u32, a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_f32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52156,19 +53514,22 @@ pub fn vrev32q_u8(a: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev64_f32(a: float32x2_t) -> float32x2_t {
-    unsafe { simd_shuffle!(a, a, [1, 0]) }
+pub fn vsetq_lane_s16<const LANE: i32>(a: i16, b: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_p16)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52177,19 +53538,26 @@ pub fn vrev64_f32(a: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev64_p16(a: poly16x4_t) -> poly16x4_t {
-    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }
+pub fn vsetq_lane_s16<const LANE: i32>(a: i16, b: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe {
+        let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x8_t = simd_insert!(b, LANE as u32, a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_p8)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52198,19 +53566,22 @@ pub fn vrev64_p16(a: poly16x4_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev64_p8(a: poly8x8_t) -> poly8x8_t {
-    unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }
+pub fn vset_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s16)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52219,19 +53590,26 @@ pub fn vrev64_p8(a: poly8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev64_s16(a: int16x4_t) -> int16x4_t {
-    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }
+pub fn vset_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe {
+        let b: int32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: int32x2_t = simd_insert!(b, LANE as u32, a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52240,19 +53618,22 @@ pub fn vrev64_s16(a: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev64_s32(a: int32x2_t) -> int32x2_t {
-    unsafe { simd_shuffle!(a, a, [1, 0]) }
+pub fn vsetq_lane_s32<const LANE: i32>(a: i32, b: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_s8)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52261,19 +53642,26 @@ pub fn vrev64_s32(a: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev64_s8(a: int8x8_t) -> int8x8_t {
-    unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }
+pub fn vsetq_lane_s32<const LANE: i32>(a: i32, b: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe {
+        let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: int32x4_t = simd_insert!(b, LANE as u32, a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u16)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52282,19 +53670,22 @@ pub fn vrev64_s8(a: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev64_u16(a: uint16x4_t) -> uint16x4_t {
-    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }
+pub fn vsetq_lane_s64<const LANE: i32>(a: i64, b: int64x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52303,19 +53694,26 @@ pub fn vrev64_u16(a: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev64_u32(a: uint32x2_t) -> uint32x2_t {
-    unsafe { simd_shuffle!(a, a, [1, 0]) }
+pub fn vsetq_lane_s64<const LANE: i32>(a: i64, b: int64x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe {
+        let b: int64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: int64x2_t = simd_insert!(b, LANE as u32, a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_u8)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52324,19 +53722,22 @@ pub fn vrev64_u32(a: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev64_u8(a: uint8x8_t) -> uint8x8_t {
-    unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }
+pub fn vset_lane_u8<const LANE: i32>(a: u8, b: uint8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_f32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52345,19 +53746,26 @@ pub fn vrev64_u8(a: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev64q_f32(a: float32x4_t) -> float32x4_t {
-    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) }
+pub fn vset_lane_u8<const LANE: i32>(a: u8, b: uint8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe {
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x8_t = simd_insert!(b, LANE as u32, a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_p16)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52366,19 +53774,22 @@ pub fn vrev64q_f32(a: float32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev64q_p16(a: poly16x8_t) -> poly16x8_t {
-    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) }
+pub fn vsetq_lane_u8<const LANE: i32>(a: u8, b: uint8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(LANE, 4);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_p8)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52387,19 +53798,31 @@ pub fn vrev64q_p16(a: poly16x8_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev64q_p8(a: poly8x16_t) -> poly8x16_t {
-    unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) }
+pub fn vsetq_lane_u8<const LANE: i32>(a: u8, b: uint8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(LANE, 4);
+    unsafe {
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t = simd_insert!(b, LANE as u32, a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s16)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52408,19 +53831,22 @@ pub fn vrev64q_p8(a: poly8x16_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev64q_s16(a: int16x8_t) -> int16x8_t {
-    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) }
+pub fn vset_lane_u16<const LANE: i32>(a: u16, b: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52429,19 +53855,26 @@ pub fn vrev64q_s16(a: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev64q_s32(a: int32x4_t) -> int32x4_t {
-    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) }
+pub fn vset_lane_u16<const LANE: i32>(a: u16, b: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe {
+        let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: uint16x4_t = simd_insert!(b, LANE as u32, a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_s8)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52450,19 +53883,22 @@ pub fn vrev64q_s32(a: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev64q_s8(a: int8x16_t) -> int8x16_t {
-    unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) }
+pub fn vsetq_lane_u16<const LANE: i32>(a: u16, b: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u16)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52471,19 +53907,26 @@ pub fn vrev64q_s8(a: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev64q_u16(a: uint16x8_t) -> uint16x8_t {
-    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) }
+pub fn vsetq_lane_u16<const LANE: i32>(a: u16, b: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe {
+        let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x8_t = simd_insert!(b, LANE as u32, a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52492,19 +53935,22 @@ pub fn vrev64q_u16(a: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev64q_u32(a: uint32x4_t) -> uint32x4_t {
-    unsafe { simd_shuffle!(a, a, [1, 0, 3, 2]) }
+pub fn vset_lane_u32<const LANE: i32>(a: u32, b: uint32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Reversing vector elements (swap endianness)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_u8)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52513,63 +53959,78 @@ pub fn vrev64q_u32(a: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrev64q_u8(a: uint8x16_t) -> uint8x16_t {
-    unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) }
+pub fn vset_lane_u32<const LANE: i32>(a: u32, b: uint32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe {
+        let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: uint32x2_t = simd_insert!(b, LANE as u32, a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
-#[doc = "Reverse elements in 64-bit doublewords"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64_f16)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrev64))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
+    assert_instr(nop, LANE = 0)
 )]
-#[target_feature(enable = "neon,fp16")]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vrev64_f16(a: float16x4_t) -> float16x4_t {
-    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }
+pub fn vsetq_lane_u32<const LANE: i32>(a: u32, b: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Reverse elements in 64-bit doublewords"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev64q_f16)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrev64))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
+    assert_instr(nop, LANE = 0)
 )]
-#[target_feature(enable = "neon,fp16")]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vrev64q_f16(a: float16x8_t) -> float16x8_t {
-    unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) }
+pub fn vsetq_lane_u32<const LANE: i32>(a: u32, b: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe {
+        let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = simd_insert!(b, LANE as u32, a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Rounding halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s8)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srhadd)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52578,27 +54039,22 @@ pub fn vrev64q_f16(a: float16x8_t) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.srhadd.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i8")]
-        fn _vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-    unsafe { _vrhadd_s8(a, b) }
+pub fn vsetq_lane_u64<const LANE: i32>(a: u64, b: uint64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Rounding halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s8)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srhadd)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52607,27 +54063,26 @@ pub fn vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.srhadd.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v16i8")]
-        fn _vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+pub fn vsetq_lane_u64<const LANE: i32>(a: u64, b: uint64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe {
+        let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: uint64x2_t = simd_insert!(b, LANE as u32, a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
-    unsafe { _vrhaddq_s8(a, b) }
 }
-#[doc = "Rounding halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s16)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srhadd)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52636,27 +54091,22 @@ pub fn vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.srhadd.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i16")]
-        fn _vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-    unsafe { _vrhadd_s16(a, b) }
+pub fn vset_lane_p8<const LANE: i32>(a: p8, b: poly8x8_t) -> poly8x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Rounding halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s16)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srhadd)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52665,27 +54115,26 @@ pub fn vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.srhadd.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i16")]
-        fn _vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+pub fn vset_lane_p8<const LANE: i32>(a: p8, b: poly8x8_t) -> poly8x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe {
+        let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x8_t = simd_insert!(b, LANE as u32, a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
-    unsafe { _vrhaddq_s16(a, b) }
 }
-#[doc = "Rounding halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srhadd)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52694,27 +54143,22 @@ pub fn vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.srhadd.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v2i32")]
-        fn _vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-    unsafe { _vrhadd_s32(a, b) }
+pub fn vsetq_lane_p8<const LANE: i32>(a: p8, b: poly8x16_t) -> poly8x16_t {
+    static_assert_uimm_bits!(LANE, 4);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Rounding halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srhadd)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52723,27 +54167,31 @@ pub fn vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.srhadd.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i32")]
-        fn _vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+pub fn vsetq_lane_p8<const LANE: i32>(a: p8, b: poly8x16_t) -> poly8x16_t {
+    static_assert_uimm_bits!(LANE, 4);
+    unsafe {
+        let b: poly8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t = simd_insert!(b, LANE as u32, a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
-    unsafe { _vrhaddq_s32(a, b) }
 }
-#[doc = "Rounding halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u8)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urhadd)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52752,27 +54200,22 @@ pub fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.urhadd.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i8")]
-        fn _vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
-    }
-    unsafe { _vrhadd_u8(a, b) }
+pub fn vset_lane_p16<const LANE: i32>(a: p16, b: poly16x4_t) -> poly16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Rounding halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u8)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urhadd)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52781,27 +54224,26 @@ pub fn vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.urhadd.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v16i8")]
-        fn _vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
+pub fn vset_lane_p16<const LANE: i32>(a: p16, b: poly16x4_t) -> poly16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe {
+        let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let ret_val: poly16x4_t = simd_insert!(b, LANE as u32, a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
-    unsafe { _vrhaddq_u8(a, b) }
 }
-#[doc = "Rounding halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u16)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urhadd)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52810,27 +54252,22 @@ pub fn vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.urhadd.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i16")]
-        fn _vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
-    }
-    unsafe { _vrhadd_u16(a, b) }
+pub fn vsetq_lane_p16<const LANE: i32>(a: p16, b: poly16x8_t) -> poly16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Rounding halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u16)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urhadd)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52839,27 +54276,25 @@ pub fn vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.urhadd.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i16")]
-        fn _vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
+pub fn vsetq_lane_p16<const LANE: i32>(a: p16, b: poly16x8_t) -> poly16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe {
+        let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly16x8_t = simd_insert!(b, LANE as u32, a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
-    unsafe { _vrhaddq_u16(a, b) }
 }
-#[doc = "Rounding halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p64)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urhadd)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52868,27 +54303,21 @@ pub fn vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.urhadd.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v2i32")]
-        fn _vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
-    }
-    unsafe { _vrhadd_u32(a, b) }
+pub fn vset_lane_p64<const LANE: i32>(a: p64, b: poly64x1_t) -> poly64x1_t {
+    static_assert!(LANE == 0);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Rounding halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urhadd)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52897,85 +54326,69 @@ pub fn vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.urhadd.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i32")]
-        fn _vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
-    }
-    unsafe { _vrhaddq_u32(a, b) }
+pub fn vset_lane_s64<const LANE: i32>(a: i64, b: int64x1_t) -> int64x1_t {
+    static_assert!(LANE == 0);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Floating-point round to integral, to nearest with ties to even"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f16)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u64)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frintn)
+    assert_instr(nop, LANE = 0)
 )]
-#[target_feature(enable = "neon,fp16")]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vrndn_f16(a: float16x4_t) -> float16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"),
-            link_name = "llvm.roundeven.v4f16"
-        )]
-        fn _vrndn_f16(a: float16x4_t) -> float16x4_t;
-    }
-    unsafe { _vrndn_f16(a) }
+pub fn vset_lane_u64<const LANE: i32>(a: u64, b: uint64x1_t) -> uint64x1_t {
+    static_assert!(LANE == 0);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Floating-point round to integral, to nearest with ties to even"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f16)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p64)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frintn)
+    assert_instr(nop, LANE = 0)
 )]
-#[target_feature(enable = "neon,fp16")]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vrndnq_f16(a: float16x8_t) -> float16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"),
-            link_name = "llvm.roundeven.v8f16"
-        )]
-        fn _vrndnq_f16(a: float16x8_t) -> float16x8_t;
-    }
-    unsafe { _vrndnq_f16(a) }
+pub fn vsetq_lane_p64<const LANE: i32>(a: p64, b: poly64x2_t) -> poly64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe { simd_insert!(b, LANE as u32, a) }
 }
-#[doc = "Floating-point round to integral, to nearest with ties to even"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p64)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frintn)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -52984,632 +54397,807 @@ pub fn vrndnq_f16(a: float16x8_t) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrndn_f32(a: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"),
-            link_name = "llvm.roundeven.v2f32"
-        )]
-        fn _vrndn_f32(a: float32x2_t) -> float32x2_t;
+pub fn vsetq_lane_p64<const LANE: i32>(a: p64, b: poly64x2_t) -> poly64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe {
+        let b: poly64x2_t = simd_shuffle!(b, b, [1, 0]);
+        let ret_val: poly64x2_t = simd_insert!(b, LANE as u32, a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
-    unsafe { _vrndn_f32(a) }
 }
-#[doc = "Floating-point round to integral, to nearest with ties to even"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f32)"]
+#[doc = "SHA1 hash update accelerator, choose."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1cq_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha1c))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frintn)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
 )]
-pub fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
+pub fn vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm"),
-            link_name = "llvm.roundeven.v4f32"
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sha1c"
         )]
-        fn _vrndnq_f32(a: float32x4_t) -> float32x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1c")]
+        fn _vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t;
     }
-    unsafe { _vrndnq_f32(a) }
+    unsafe { _vsha1cq_u32(hash_abcd, hash_e, wk) }
 }
-#[doc = "Signed rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s8)"]
+#[doc = "SHA1 hash update accelerator, choose."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1cq_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha1c))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srshl)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
 )]
-pub fn vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+pub fn vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.srshl.v8i8"
+            link_name = "llvm.aarch64.crypto.sha1c"
         )]
-        fn _vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1c")]
+        fn _vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t;
+    }
+    unsafe {
+        let hash_abcd: uint32x4_t = simd_shuffle!(hash_abcd, hash_abcd, [3, 2, 1, 0]);
+        let wk: uint32x4_t = simd_shuffle!(wk, wk, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = _vsha1cq_u32(hash_abcd, hash_e, wk);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
-    unsafe { _vrshl_s8(a, b) }
 }
-#[doc = "Signed rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s8)"]
+#[doc = "SHA1 fixed rotate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1h_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha1h))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srshl)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
 )]
-pub fn vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+pub fn vsha1h_u32(hash_e: u32) -> u32 {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.srshl.v16i8"
+            link_name = "llvm.aarch64.crypto.sha1h"
         )]
-        fn _vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1h")]
+        fn _vsha1h_u32(hash_e: u32) -> u32;
     }
-    unsafe { _vrshlq_s8(a, b) }
+    unsafe { _vsha1h_u32(hash_e) }
 }
-#[doc = "Signed rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s16)"]
+#[doc = "SHA1 hash update accelerator, majority"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1mq_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha1m))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srshl)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
 )]
-pub fn vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+pub fn vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.srshl.v4i16"
+            link_name = "llvm.aarch64.crypto.sha1m"
         )]
-        fn _vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1m")]
+        fn _vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t;
     }
-    unsafe { _vrshl_s16(a, b) }
+    unsafe { _vsha1mq_u32(hash_abcd, hash_e, wk) }
 }
-#[doc = "Signed rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s16)"]
+#[doc = "SHA1 hash update accelerator, majority"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1mq_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha1m))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srshl)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
 )]
-pub fn vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+pub fn vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.srshl.v8i16"
+            link_name = "llvm.aarch64.crypto.sha1m"
         )]
-        fn _vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1m")]
+        fn _vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t;
+    }
+    unsafe {
+        let hash_abcd: uint32x4_t = simd_shuffle!(hash_abcd, hash_abcd, [3, 2, 1, 0]);
+        let wk: uint32x4_t = simd_shuffle!(wk, wk, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = _vsha1mq_u32(hash_abcd, hash_e, wk);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
-    unsafe { _vrshlq_s16(a, b) }
 }
-#[doc = "Signed rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s32)"]
+#[doc = "SHA1 hash update accelerator, parity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1pq_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha1p))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srshl)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
 )]
-pub fn vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+pub fn vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.srshl.v2i32"
+            link_name = "llvm.aarch64.crypto.sha1p"
         )]
-        fn _vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1p")]
+        fn _vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t;
     }
-    unsafe { _vrshl_s32(a, b) }
+    unsafe { _vsha1pq_u32(hash_abcd, hash_e, wk) }
 }
-#[doc = "Signed rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s32)"]
+#[doc = "SHA1 hash update accelerator, parity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1pq_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srshl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha1p))]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+)]
+pub fn vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.srshl.v4i32"
+            link_name = "llvm.aarch64.crypto.sha1p"
         )]
-        fn _vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1p")]
+        fn _vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t;
+    }
+    unsafe {
+        let hash_abcd: uint32x4_t = simd_shuffle!(hash_abcd, hash_abcd, [3, 2, 1, 0]);
+        let wk: uint32x4_t = simd_shuffle!(wk, wk, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = _vsha1pq_u32(hash_abcd, hash_e, wk);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
-    unsafe { _vrshlq_s32(a, b) }
 }
-#[doc = "Signed rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s64)"]
+#[doc = "SHA1 schedule update accelerator, first part."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1su0q_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha1su0))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srshl)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
 )]
-pub fn vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+pub fn vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v1i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.srshl.v1i64"
+            link_name = "llvm.aarch64.crypto.sha1su0"
         )]
-        fn _vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su0")]
+        fn _vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t;
     }
-    unsafe { _vrshl_s64(a, b) }
+    unsafe { _vsha1su0q_u32(w0_3, w4_7, w8_11) }
 }
-#[doc = "Signed rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s64)"]
+#[doc = "SHA1 schedule update accelerator, first part."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1su0q_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha1su0))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srshl)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
 )]
-pub fn vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+pub fn vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.srshl.v2i64"
+            link_name = "llvm.aarch64.crypto.sha1su0"
         )]
-        fn _vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su0")]
+        fn _vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t;
+    }
+    unsafe {
+        let w0_3: uint32x4_t = simd_shuffle!(w0_3, w0_3, [3, 2, 1, 0]);
+        let w4_7: uint32x4_t = simd_shuffle!(w4_7, w4_7, [3, 2, 1, 0]);
+        let w8_11: uint32x4_t = simd_shuffle!(w8_11, w8_11, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = _vsha1su0q_u32(w0_3, w4_7, w8_11);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
-    unsafe { _vrshlq_s64(a, b) }
 }
-#[doc = "Unsigned rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u8)"]
+#[doc = "SHA1 schedule update accelerator, second part."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1su1q_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha1su1))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urshl)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
 )]
-pub fn vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
+pub fn vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.urshl.v8i8"
+            link_name = "llvm.aarch64.crypto.sha1su1"
         )]
-        fn _vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su1")]
+        fn _vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t;
     }
-    unsafe { _vrshl_u8(a, b) }
+    unsafe { _vsha1su1q_u32(tw0_3, w12_15) }
 }
-#[doc = "Unsigned rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u8)"]
+#[doc = "SHA1 schedule update accelerator, second part."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1su1q_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha1su1))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urshl)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
 )]
-pub fn vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
+pub fn vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.urshl.v16i8"
+            link_name = "llvm.aarch64.crypto.sha1su1"
         )]
-        fn _vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su1")]
+        fn _vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t;
+    }
+    unsafe {
+        let tw0_3: uint32x4_t = simd_shuffle!(tw0_3, tw0_3, [3, 2, 1, 0]);
+        let w12_15: uint32x4_t = simd_shuffle!(w12_15, w12_15, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = _vsha1su1q_u32(tw0_3, w12_15);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
-    unsafe { _vrshlq_u8(a, b) }
 }
-#[doc = "Unsigned rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u16)"]
+#[doc = "SHA1 schedule update accelerator, upper part."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256h2q_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha256h2))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urshl)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
 )]
-pub fn vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
+pub fn vsha256h2q_u32(hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.urshl.v4i16"
+            link_name = "llvm.aarch64.crypto.sha256h2"
         )]
-        fn _vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h2")]
+        fn _vsha256h2q_u32(
+            hash_abcd: uint32x4_t,
+            hash_efgh: uint32x4_t,
+            wk: uint32x4_t,
+        ) -> uint32x4_t;
     }
-    unsafe { _vrshl_u16(a, b) }
+    unsafe { _vsha256h2q_u32(hash_abcd, hash_efgh, wk) }
 }
-#[doc = "Unsigned rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u16)"]
+#[doc = "SHA1 schedule update accelerator, upper part."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256h2q_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha256h2))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urshl)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
 )]
-pub fn vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
+pub fn vsha256h2q_u32(hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.urshl.v8i16"
+            link_name = "llvm.aarch64.crypto.sha256h2"
         )]
-        fn _vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h2")]
+        fn _vsha256h2q_u32(
+            hash_abcd: uint32x4_t,
+            hash_efgh: uint32x4_t,
+            wk: uint32x4_t,
+        ) -> uint32x4_t;
+    }
+    unsafe {
+        let hash_abcd: uint32x4_t = simd_shuffle!(hash_abcd, hash_abcd, [3, 2, 1, 0]);
+        let hash_efgh: uint32x4_t = simd_shuffle!(hash_efgh, hash_efgh, [3, 2, 1, 0]);
+        let wk: uint32x4_t = simd_shuffle!(wk, wk, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = _vsha256h2q_u32(hash_abcd, hash_efgh, wk);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
-    unsafe { _vrshlq_u16(a, b) }
 }
-#[doc = "Unsigned rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u32)"]
+#[doc = "SHA1 schedule update accelerator, first part."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256hq_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha256h))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urshl)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
 )]
-pub fn vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
+pub fn vsha256hq_u32(hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.urshl.v2i32"
+            link_name = "llvm.aarch64.crypto.sha256h"
         )]
-        fn _vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h")]
+        fn _vsha256hq_u32(
+            hash_abcd: uint32x4_t,
+            hash_efgh: uint32x4_t,
+            wk: uint32x4_t,
+        ) -> uint32x4_t;
     }
-    unsafe { _vrshl_u32(a, b) }
+    unsafe { _vsha256hq_u32(hash_abcd, hash_efgh, wk) }
 }
-#[doc = "Unsigned rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u32)"]
+#[doc = "SHA1 schedule update accelerator, first part."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256hq_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha256h))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urshl)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
 )]
-pub fn vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
+pub fn vsha256hq_u32(hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.urshl.v4i32"
+            link_name = "llvm.aarch64.crypto.sha256h"
         )]
-        fn _vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h")]
+        fn _vsha256hq_u32(
+            hash_abcd: uint32x4_t,
+            hash_efgh: uint32x4_t,
+            wk: uint32x4_t,
+        ) -> uint32x4_t;
+    }
+    unsafe {
+        let hash_abcd: uint32x4_t = simd_shuffle!(hash_abcd, hash_abcd, [3, 2, 1, 0]);
+        let hash_efgh: uint32x4_t = simd_shuffle!(hash_efgh, hash_efgh, [3, 2, 1, 0]);
+        let wk: uint32x4_t = simd_shuffle!(wk, wk, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = _vsha256hq_u32(hash_abcd, hash_efgh, wk);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
-    unsafe { _vrshlq_u32(a, b) }
 }
-#[doc = "Unsigned rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u64)"]
+#[doc = "SHA256 schedule update accelerator, first part."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256su0q_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha256su0))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urshl)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
 )]
-pub fn vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
+pub fn vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v1i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.urshl.v1i64"
+            link_name = "llvm.aarch64.crypto.sha256su0"
         )]
-        fn _vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su0")]
+        fn _vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t;
     }
-    unsafe { _vrshl_u64(a, b) }
+    unsafe { _vsha256su0q_u32(w0_3, w4_7) }
 }
-#[doc = "Unsigned rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u64)"]
+#[doc = "SHA256 schedule update accelerator, first part."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256su0q_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha256su0))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urshl)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
 )]
-pub fn vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
+pub fn vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.urshl.v2i64"
+            link_name = "llvm.aarch64.crypto.sha256su0"
         )]
-        fn _vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su0")]
+        fn _vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t;
+    }
+    unsafe {
+        let w0_3: uint32x4_t = simd_shuffle!(w0_3, w0_3, [3, 2, 1, 0]);
+        let w4_7: uint32x4_t = simd_shuffle!(w4_7, w4_7, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = _vsha256su0q_u32(w0_3, w4_7);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
-    unsafe { _vrshlq_u64(a, b) }
 }
-#[doc = "Signed rounding shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s8)"]
+#[doc = "SHA256 schedule update accelerator, second part."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256su1q_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srshr, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha256su1))]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrshr_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    vrshl_s8(a, vdup_n_s8(-N as _))
-}
-#[doc = "Signed rounding shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srshr, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
 )]
-pub fn vrshrq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    vrshlq_s8(a, vdupq_n_s8(-N as _))
+pub fn vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sha256su1"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su1")]
+        fn _vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t)
+            -> uint32x4_t;
+    }
+    unsafe { _vsha256su1q_u32(tw0_3, w8_11, w12_15) }
 }
-#[doc = "Signed rounding shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s16)"]
+#[doc = "SHA256 schedule update accelerator, second part."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256su1q_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha256su1))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srshr, N = 2)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
 )]
-pub fn vrshr_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    vrshl_s16(a, vdup_n_s16(-N as _))
+pub fn vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sha256su1"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su1")]
+        fn _vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t)
+            -> uint32x4_t;
+    }
+    unsafe {
+        let tw0_3: uint32x4_t = simd_shuffle!(tw0_3, tw0_3, [3, 2, 1, 0]);
+        let w8_11: uint32x4_t = simd_shuffle!(w8_11, w8_11, [3, 2, 1, 0]);
+        let w12_15: uint32x4_t = simd_shuffle!(w12_15, w12_15, [3, 2, 1, 0]);
+        let ret_val: uint32x4_t = _vsha256su1q_u32(tw0_3, w8_11, w12_15);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
-#[doc = "Signed rounding shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srshr, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vrshrq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    vrshlq_s16(a, vdupq_n_s16(-N as _))
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[rustc_legacy_const_generics(2)]
+fn vshiftlins_v16i8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v16i8")]
+        fn _vshiftlins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
+    }
+    unsafe { _vshiftlins_v16i8(a, b, const { int8x16_t([N as i8; 16]) }) }
 }
-#[doc = "Signed rounding shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srshr, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vrshr_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    vrshl_s32(a, vdup_n_s32(-N as _))
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[rustc_legacy_const_generics(2)]
+fn vshiftlins_v1i64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v1i64")]
+        fn _vshiftlins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t;
+    }
+    unsafe { _vshiftlins_v1i64(a, b, const { int64x1_t([N as i64; 1]) }) }
 }
-#[doc = "Signed rounding shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srshr, N = 2)
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[rustc_legacy_const_generics(2)]
+fn vshiftlins_v2i32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i32")]
+        fn _vshiftlins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t;
+    }
+    unsafe { _vshiftlins_v2i32(a, b, const { int32x2_t([N; 2]) }) }
+}
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[rustc_legacy_const_generics(2)]
+fn vshiftlins_v2i64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i64")]
+        fn _vshiftlins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+    }
+    unsafe { _vshiftlins_v2i64(a, b, const { int64x2_t([N as i64; 2]) }) }
+}
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[rustc_legacy_const_generics(2)]
+fn vshiftlins_v4i16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i16")]
+        fn _vshiftlins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t;
+    }
+    unsafe { _vshiftlins_v4i16(a, b, const { int16x4_t([N as i16; 4]) }) }
+}
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[rustc_legacy_const_generics(2)]
+fn vshiftlins_v4i32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i32")]
+        fn _vshiftlins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+    }
+    unsafe { _vshiftlins_v4i32(a, b, const { int32x4_t([N; 4]) }) }
+}
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[rustc_legacy_const_generics(2)]
+fn vshiftlins_v8i16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i16")]
+        fn _vshiftlins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
+    }
+    unsafe { _vshiftlins_v8i16(a, b, const { int16x8_t([N as i16; 8]) }) }
+}
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[rustc_legacy_const_generics(2)]
+fn vshiftlins_v8i8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i8")]
+        fn _vshiftlins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
+    }
+    unsafe { _vshiftlins_v8i8(a, b, const { int8x8_t([N as i8; 8]) }) }
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v16i8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[rustc_legacy_const_generics(2)]
+fn vshiftrins_v16i8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v16i8")]
+        fn _vshiftrins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
+    }
+    unsafe { _vshiftrins_v16i8(a, b, const { int8x16_t([-N as i8; 16]) }) }
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v1i64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[rustc_legacy_const_generics(2)]
+fn vshiftrins_v1i64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v1i64")]
+        fn _vshiftrins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t;
+    }
+    unsafe { _vshiftrins_v1i64(a, b, const { int64x1_t([-N as i64; 1]) }) }
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v2i32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[rustc_legacy_const_generics(2)]
+fn vshiftrins_v2i32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i32")]
+        fn _vshiftrins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t;
+    }
+    unsafe { _vshiftrins_v2i32(a, b, const { int32x2_t([-N; 2]) }) }
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v2i64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[rustc_legacy_const_generics(2)]
+fn vshiftrins_v2i64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i64")]
+        fn _vshiftrins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+    }
+    unsafe { _vshiftrins_v2i64(a, b, const { int64x2_t([-N as i64; 2]) }) }
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v4i16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[rustc_legacy_const_generics(2)]
+fn vshiftrins_v4i16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i16")]
+        fn _vshiftrins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t;
+    }
+    unsafe { _vshiftrins_v4i16(a, b, const { int16x4_t([-N as i16; 4]) }) }
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v4i32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[rustc_legacy_const_generics(2)]
+fn vshiftrins_v4i32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i32")]
+        fn _vshiftrins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+    }
+    unsafe { _vshiftrins_v4i32(a, b, const { int32x4_t([-N; 4]) }) }
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v8i16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[rustc_legacy_const_generics(2)]
+fn vshiftrins_v8i16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i16")]
+        fn _vshiftrins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
+    }
+    unsafe { _vshiftrins_v8i16(a, b, const { int16x8_t([-N as i16; 8]) }) }
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v8i8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[rustc_legacy_const_generics(2)]
+fn vshiftrins_v8i8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i8")]
+        fn _vshiftrins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
+    }
+    unsafe { _vshiftrins_v8i8(a, b, const { int8x8_t([-N as i8; 8]) }) }
+}
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shl, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -53620,19 +55208,19 @@ pub fn vrshr_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrshrq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    vrshlq_s32(a, vdupq_n_s32(-N as _))
+pub fn vshl_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe { simd_shl(a, vdup_n_s8(N as _)) }
 }
-#[doc = "Signed rounding shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s64)"]
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srshr, N = 2)
+    assert_instr(shl, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -53643,19 +55231,19 @@ pub fn vrshrq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrshr_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    vrshl_s64(a, vdup_n_s64(-N as _))
+pub fn vshlq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe { simd_shl(a, vdupq_n_s8(N as _)) }
 }
-#[doc = "Signed rounding shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s64)"]
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srshr, N = 2)
+    assert_instr(shl, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -53666,19 +55254,19 @@ pub fn vrshr_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrshrq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    vrshlq_s64(a, vdupq_n_s64(-N as _))
+pub fn vshl_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe { simd_shl(a, vdup_n_s16(N as _)) }
 }
-#[doc = "Unsigned rounding shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u8)"]
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urshr, N = 2)
+    assert_instr(shl, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -53689,19 +55277,19 @@ pub fn vrshrq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrshr_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    vrshl_u8(a, vdup_n_s8(-N as _))
+pub fn vshlq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe { simd_shl(a, vdupq_n_s16(N as _)) }
 }
-#[doc = "Unsigned rounding shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u8)"]
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urshr, N = 2)
+    assert_instr(shl, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -53712,19 +55300,19 @@ pub fn vrshr_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrshrq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    vrshlq_u8(a, vdupq_n_s8(-N as _))
+pub fn vshl_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(N, 5);
+    unsafe { simd_shl(a, vdup_n_s32(N as _)) }
 }
-#[doc = "Unsigned rounding shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u16)"]
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urshr, N = 2)
+    assert_instr(shl, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -53735,19 +55323,19 @@ pub fn vrshrq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrshr_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    vrshl_u16(a, vdup_n_s16(-N as _))
+pub fn vshlq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 5);
+    unsafe { simd_shl(a, vdupq_n_s32(N as _)) }
 }
-#[doc = "Unsigned rounding shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u16)"]
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urshr, N = 2)
+    assert_instr(shl, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -53758,19 +55346,19 @@ pub fn vrshr_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrshrq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    vrshlq_u16(a, vdupq_n_s16(-N as _))
+pub fn vshl_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
+    static_assert_uimm_bits!(N, 6);
+    unsafe { simd_shl(a, vdup_n_s64(N as _)) }
 }
-#[doc = "Unsigned rounding shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u32)"]
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urshr, N = 2)
+    assert_instr(shl, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -53781,19 +55369,19 @@ pub fn vrshrq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrshr_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    vrshl_u32(a, vdup_n_s32(-N as _))
+pub fn vshlq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 6);
+    unsafe { simd_shl(a, vdupq_n_s64(N as _)) }
 }
-#[doc = "Unsigned rounding shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u32)"]
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urshr, N = 2)
+    assert_instr(shl, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -53804,19 +55392,19 @@ pub fn vrshr_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrshrq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    vrshlq_u32(a, vdupq_n_s32(-N as _))
+pub fn vshl_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe { simd_shl(a, vdup_n_u8(N as _)) }
 }
-#[doc = "Unsigned rounding shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u64)"]
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urshr, N = 2)
+    assert_instr(shl, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -53827,19 +55415,19 @@ pub fn vrshrq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrshr_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    vrshl_u64(a, vdup_n_s64(-N as _))
+pub fn vshlq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe { simd_shl(a, vdupq_n_u8(N as _)) }
 }
-#[doc = "Unsigned rounding shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u64)"]
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urshr, N = 2)
+    assert_instr(shl, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -53850,124 +55438,19 @@ pub fn vrshr_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrshrq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    vrshlq_u64(a, vdupq_n_s64(-N as _))
-}
-#[doc = "Rounding shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s16)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v8i8")]
-        fn _vrshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
-    }
-    unsafe { _vrshrn_n_s16(a, const { int16x8_t([-N as i16; 8]) }) }
-}
-#[doc = "Rounding shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s32)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v4i16")]
-        fn _vrshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
-    }
-    unsafe { _vrshrn_n_s32(a, const { int32x4_t([-N; 4]) }) }
-}
-#[doc = "Rounding shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s64)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v2i32")]
-        fn _vrshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
-    }
-    unsafe { _vrshrn_n_s64(a, const { int64x2_t([-N as i64; 2]) }) }
-}
-#[doc = "Rounding shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(rshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.rshrn.v8i8"
-        )]
-        fn _vrshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t;
-    }
-    unsafe { _vrshrn_n_s16(a, N) }
-}
-#[doc = "Rounding shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(rshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.rshrn.v4i16"
-        )]
-        fn _vrshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t;
-    }
-    unsafe { _vrshrn_n_s32(a, N) }
-}
-#[doc = "Rounding shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(rshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.rshrn.v2i32"
-        )]
-        fn _vrshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t;
-    }
-    unsafe { _vrshrn_n_s64(a, N) }
+pub fn vshl_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe { simd_shl(a, vdup_n_u16(N as _)) }
 }
-#[doc = "Rounding shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u16)"]
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rshrn, N = 2)
+    assert_instr(shl, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -53978,19 +55461,19 @@ pub fn vrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe { transmute(vrshrn_n_s16::<N>(transmute(a))) }
+pub fn vshlq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe { simd_shl(a, vdupq_n_u16(N as _)) }
 }
-#[doc = "Rounding shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u32)"]
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rshrn, N = 2)
+    assert_instr(shl, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -54001,19 +55484,19 @@ pub fn vrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe { transmute(vrshrn_n_s32::<N>(transmute(a))) }
+pub fn vshl_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(N, 5);
+    unsafe { simd_shl(a, vdup_n_u32(N as _)) }
 }
-#[doc = "Rounding shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u64)"]
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rshrn, N = 2)
+    assert_instr(shl, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -54024,79 +55507,65 @@ pub fn vrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe { transmute(vrshrn_n_s64::<N>(transmute(a))) }
+pub fn vshlq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 5);
+    unsafe { simd_shl(a, vdupq_n_u32(N as _)) }
 }
-#[doc = "Reciprocal square-root estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f16)"]
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u64)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,fp16")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frsqrte)
+    assert_instr(shl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vrsqrte_f16(a: float16x4_t) -> float16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrte.v4f16"
-        )]
-        fn _vrsqrte_f16(a: float16x4_t) -> float16x4_t;
-    }
-    unsafe { _vrsqrte_f16(a) }
+pub fn vshl_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
+    static_assert_uimm_bits!(N, 6);
+    unsafe { simd_shl(a, vdup_n_u64(N as _)) }
 }
-#[doc = "Reciprocal square-root estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f16)"]
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u64)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,fp16")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frsqrte)
+    assert_instr(shl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vrsqrteq_f16(a: float16x8_t) -> float16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v8f16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrte.v8f16"
-        )]
-        fn _vrsqrteq_f16(a: float16x8_t) -> float16x8_t;
-    }
-    unsafe { _vrsqrteq_f16(a) }
+pub fn vshlq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(N, 6);
+    unsafe { simd_shl(a, vdupq_n_u64(N as _)) }
 }
-#[doc = "Reciprocal square-root estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f32)"]
+#[doc = "Signed Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frsqrte)
+    assert_instr(sshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -54106,26 +55575,26 @@ pub fn vrsqrteq_f16(a: float16x8_t) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsqrte_f32(a: float32x2_t) -> float32x2_t {
+pub fn vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrte.v2f32"
+            link_name = "llvm.aarch64.neon.sshl.v8i8"
         )]
-        fn _vrsqrte_f32(a: float32x2_t) -> float32x2_t;
+        fn _vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-    unsafe { _vrsqrte_f32(a) }
+    unsafe { _vshl_s8(a, b) }
 }
-#[doc = "Reciprocal square-root estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f32)"]
+#[doc = "Signed Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frsqrte)
+    assert_instr(sshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -54135,26 +55604,26 @@ pub fn vrsqrte_f32(a: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsqrteq_f32(a: float32x4_t) -> float32x4_t {
+pub fn vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrte.v4f32"
+            link_name = "llvm.aarch64.neon.sshl.v16i8"
         )]
-        fn _vrsqrteq_f32(a: float32x4_t) -> float32x4_t;
+        fn _vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
-    unsafe { _vrsqrteq_f32(a) }
+    unsafe { _vshlq_s8(a, b) }
 }
-#[doc = "Unsigned reciprocal square root estimate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_u32)"]
+#[doc = "Signed Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ursqrte)
+    assert_instr(sshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -54164,26 +55633,26 @@ pub fn vrsqrteq_f32(a: float32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsqrte_u32(a: uint32x2_t) -> uint32x2_t {
+pub fn vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ursqrte.v2i32"
+            link_name = "llvm.aarch64.neon.sshl.v4i16"
         )]
-        fn _vrsqrte_u32(a: uint32x2_t) -> uint32x2_t;
+        fn _vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-    unsafe { _vrsqrte_u32(a) }
+    unsafe { _vshl_s16(a, b) }
 }
-#[doc = "Unsigned reciprocal square root estimate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_u32)"]
+#[doc = "Signed Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ursqrte)
+    assert_instr(sshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -54193,86 +55662,84 @@ pub fn vrsqrte_u32(a: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t {
+pub fn vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ursqrte.v4i32"
+            link_name = "llvm.aarch64.neon.sshl.v8i16"
         )]
-        fn _vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t;
+        fn _vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
-    unsafe { _vrsqrteq_u32(a) }
+    unsafe { _vshlq_s16(a, b) }
 }
-#[doc = "Floating-point reciprocal square root step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f16)"]
+#[doc = "Signed Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s32)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,fp16")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frsqrts)
+    assert_instr(sshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vrsqrts_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+pub fn vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v4f16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrts.v4f16"
+            link_name = "llvm.aarch64.neon.sshl.v2i32"
         )]
-        fn _vrsqrts_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
+        fn _vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-    unsafe { _vrsqrts_f16(a, b) }
+    unsafe { _vshl_s32(a, b) }
 }
-#[doc = "Floating-point reciprocal square root step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f16)"]
+#[doc = "Signed Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s32)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,fp16")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frsqrts)
+    assert_instr(sshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vrsqrtsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+pub fn vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v8f16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrts.v8f16"
+            link_name = "llvm.aarch64.neon.sshl.v4i32"
         )]
-        fn _vrsqrtsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
+        fn _vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-    unsafe { _vrsqrtsq_f16(a, b) }
+    unsafe { _vshlq_s32(a, b) }
 }
-#[doc = "Floating-point reciprocal square root step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f32)"]
+#[doc = "Signed Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frsqrts)
+    assert_instr(sshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -54282,26 +55749,26 @@ pub fn vrsqrtsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsqrts_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+pub fn vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v2f32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v1i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrts.v2f32"
+            link_name = "llvm.aarch64.neon.sshl.v1i64"
         )]
-        fn _vrsqrts_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+        fn _vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
     }
-    unsafe { _vrsqrts_f32(a, b) }
+    unsafe { _vshl_s64(a, b) }
 }
-#[doc = "Floating-point reciprocal square root step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f32)"]
+#[doc = "Signed Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frsqrts)
+    assert_instr(sshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -54311,28 +55778,27 @@ pub fn vrsqrts_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsqrtsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+pub fn vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v4f32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrts.v4f32"
+            link_name = "llvm.aarch64.neon.sshl.v2i64"
         )]
-        fn _vrsqrtsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+        fn _vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
     }
-    unsafe { _vrsqrtsq_f32(a, b) }
+    unsafe { _vshlq_s64(a, b) }
 }
-#[doc = "Signed rounding shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s8)"]
+#[doc = "Unsigned Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srsra, N = 2)
+    assert_instr(ushl)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54341,21 +55807,27 @@ pub fn vrsqrtsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsra_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe { simd_add(a, vrshr_n_s8::<N>(b)) }
+pub fn vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ushl.v8i8"
+        )]
+        fn _vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
+    }
+    unsafe { _vshl_u8(a, b) }
 }
-#[doc = "Signed rounding shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s8)"]
+#[doc = "Unsigned Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srsra, N = 2)
+    assert_instr(ushl)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54364,21 +55836,27 @@ pub fn vrsra_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsraq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe { simd_add(a, vrshrq_n_s8::<N>(b)) }
+pub fn vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ushl.v16i8"
+        )]
+        fn _vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
+    }
+    unsafe { _vshlq_u8(a, b) }
 }
-#[doc = "Signed rounding shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s16)"]
+#[doc = "Unsigned Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srsra, N = 2)
+    assert_instr(ushl)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54387,21 +55865,27 @@ pub fn vrsraq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsra_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_add(a, vrshr_n_s16::<N>(b)) }
+pub fn vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ushl.v4i16"
+        )]
+        fn _vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
+    }
+    unsafe { _vshl_u16(a, b) }
 }
-#[doc = "Signed rounding shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s16)"]
+#[doc = "Unsigned Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srsra, N = 2)
+    assert_instr(ushl)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54410,21 +55894,27 @@ pub fn vrsra_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsraq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_add(a, vrshrq_n_s16::<N>(b)) }
+pub fn vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ushl.v8i16"
+        )]
+        fn _vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
+    }
+    unsafe { _vshlq_u16(a, b) }
 }
-#[doc = "Signed rounding shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s32)"]
+#[doc = "Unsigned Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srsra, N = 2)
+    assert_instr(ushl)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54433,21 +55923,27 @@ pub fn vrsraq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsra_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_add(a, vrshr_n_s32::<N>(b)) }
+pub fn vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ushl.v2i32"
+        )]
+        fn _vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
+    }
+    unsafe { _vshl_u32(a, b) }
 }
-#[doc = "Signed rounding shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s32)"]
+#[doc = "Unsigned Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srsra, N = 2)
+    assert_instr(ushl)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54456,21 +55952,27 @@ pub fn vrsra_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsraq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_add(a, vrshrq_n_s32::<N>(b)) }
+pub fn vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ushl.v4i32"
+        )]
+        fn _vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
+    }
+    unsafe { _vshlq_u32(a, b) }
 }
-#[doc = "Signed rounding shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s64)"]
+#[doc = "Unsigned Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srsra, N = 2)
+    assert_instr(ushl)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54479,21 +55981,27 @@ pub fn vrsraq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsra_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    unsafe { simd_add(a, vrshr_n_s64::<N>(b)) }
+pub fn vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v1i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ushl.v1i64"
+        )]
+        fn _vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
+    }
+    unsafe { _vshl_u64(a, b) }
 }
-#[doc = "Signed rounding shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s64)"]
+#[doc = "Unsigned Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(srsra, N = 2)
+    assert_instr(ushl)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54502,21 +56010,28 @@ pub fn vrsra_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsraq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    unsafe { simd_add(a, vrshrq_n_s64::<N>(b)) }
+pub fn vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ushl.v2i64"
+        )]
+        fn _vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
+    }
+    unsafe { _vshlq_u64(a, b) }
 }
-#[doc = "Unsigned rounding shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u8)"]
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ursra, N = 2)
+    assert_instr(sshll, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54525,21 +56040,21 @@ pub fn vrsraq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsra_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe { simd_add(a, vrshr_n_u8::<N>(b)) }
+pub fn vshll_n_s16<const N: i32>(a: int16x4_t) -> int32x4_t {
+    static_assert!(N >= 0 && N <= 16);
+    unsafe { simd_shl(simd_cast(a), vdupq_n_s32(N as _)) }
 }
-#[doc = "Unsigned rounding shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u8)"]
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ursra, N = 2)
+    assert_instr(sshll, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54548,21 +56063,21 @@ pub fn vrsra_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsraq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe { simd_add(a, vrshrq_n_u8::<N>(b)) }
+pub fn vshll_n_s32<const N: i32>(a: int32x2_t) -> int64x2_t {
+    static_assert!(N >= 0 && N <= 32);
+    unsafe { simd_shl(simd_cast(a), vdupq_n_s64(N as _)) }
 }
-#[doc = "Unsigned rounding shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u16)"]
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s8", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ursra, N = 2)
+    assert_instr(sshll, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54571,21 +56086,21 @@ pub fn vrsraq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsra_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_add(a, vrshr_n_u16::<N>(b)) }
+pub fn vshll_n_s8<const N: i32>(a: int8x8_t) -> int16x8_t {
+    static_assert!(N >= 0 && N <= 8);
+    unsafe { simd_shl(simd_cast(a), vdupq_n_s16(N as _)) }
 }
-#[doc = "Unsigned rounding shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u16)"]
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ursra, N = 2)
+    assert_instr(ushll, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54594,21 +56109,21 @@ pub fn vrsra_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsraq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_add(a, vrshrq_n_u16::<N>(b)) }
+pub fn vshll_n_u16<const N: i32>(a: uint16x4_t) -> uint32x4_t {
+    static_assert!(N >= 0 && N <= 16);
+    unsafe { simd_shl(simd_cast(a), vdupq_n_u32(N as _)) }
 }
-#[doc = "Unsigned rounding shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u32)"]
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ursra, N = 2)
+    assert_instr(ushll, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54617,21 +56132,21 @@ pub fn vrsraq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsra_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_add(a, vrshr_n_u32::<N>(b)) }
+pub fn vshll_n_u32<const N: i32>(a: uint32x2_t) -> uint64x2_t {
+    static_assert!(N >= 0 && N <= 32);
+    unsafe { simd_shl(simd_cast(a), vdupq_n_u64(N as _)) }
 }
-#[doc = "Unsigned rounding shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u32)"]
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u8", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ursra, N = 2)
+    assert_instr(ushll, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54640,21 +56155,21 @@ pub fn vrsra_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsraq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_add(a, vrshrq_n_u32::<N>(b)) }
+pub fn vshll_n_u8<const N: i32>(a: uint8x8_t) -> uint16x8_t {
+    static_assert!(N >= 0 && N <= 8);
+    unsafe { simd_shl(simd_cast(a), vdupq_n_u16(N as _)) }
 }
-#[doc = "Unsigned rounding shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u64)"]
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ursra, N = 2)
+    assert_instr(sshr, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54663,21 +56178,22 @@ pub fn vrsraq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsra_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    unsafe { simd_add(a, vrshr_n_u64::<N>(b)) }
+pub fn vshr_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    let n: i32 = if N == 8 { 7 } else { N };
+    unsafe { simd_shr(a, vdup_n_s8(n as _)) }
 }
-#[doc = "Unsigned rounding shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u64)"]
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ursra, N = 2)
+    assert_instr(sshr, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54686,20 +56202,22 @@ pub fn vrsra_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsraq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    unsafe { simd_add(a, vrshrq_n_u64::<N>(b)) }
+pub fn vshrq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    let n: i32 = if N == 8 { 7 } else { N };
+    unsafe { simd_shr(a, vdupq_n_s8(n as _)) }
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s16)"]
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rsubhn)
+    assert_instr(sshr, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54708,27 +56226,22 @@ pub fn vrsraq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v8i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.rsubhn.v8i8"
-        )]
-        fn _vrsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t;
-    }
-    unsafe { _vrsubhn_s16(a, b) }
+pub fn vshr_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    let n: i32 = if N == 16 { 15 } else { N };
+    unsafe { simd_shr(a, vdup_n_s16(n as _)) }
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s32)"]
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rsubhn)
+    assert_instr(sshr, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54737,27 +56250,22 @@ pub fn vrsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v4i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.rsubhn.v4i16"
-        )]
-        fn _vrsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t;
-    }
-    unsafe { _vrsubhn_s32(a, b) }
+pub fn vshrq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    let n: i32 = if N == 16 { 15 } else { N };
+    unsafe { simd_shr(a, vdupq_n_s16(n as _)) }
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s64)"]
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rsubhn)
+    assert_instr(sshr, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54766,27 +56274,22 @@ pub fn vrsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v2i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.rsubhn.v2i32"
-        )]
-        fn _vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t;
-    }
-    unsafe { _vrsubhn_s64(a, b) }
+pub fn vshr_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    let n: i32 = if N == 32 { 31 } else { N };
+    unsafe { simd_shr(a, vdup_n_s32(n as _)) }
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u16)"]
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rsubhn)
+    assert_instr(sshr, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54795,19 +56298,22 @@ pub fn vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
-    unsafe { transmute(vrsubhn_s16(transmute(a), transmute(b))) }
+pub fn vshrq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    let n: i32 = if N == 32 { 31 } else { N };
+    unsafe { simd_shr(a, vdupq_n_s32(n as _)) }
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u32)"]
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rsubhn)
+    assert_instr(sshr, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54816,19 +56322,22 @@ pub fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
-    unsafe { transmute(vrsubhn_s32(transmute(a), transmute(b))) }
+pub fn vshr_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    let n: i32 = if N == 64 { 63 } else { N };
+    unsafe { simd_shr(a, vdup_n_s64(n as _)) }
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u64)"]
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rsubhn)
+    assert_instr(sshr, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54837,54 +56346,22 @@ pub fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
-    unsafe { transmute(vrsubhn_s64(transmute(a), transmute(b))) }
-}
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vset_lane_f16<const LANE: i32>(a: f16, b: float16x4_t) -> float16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_insert!(b, LANE as u32, a) }
-}
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vsetq_lane_f16<const LANE: i32>(a: f16, b: float16x8_t) -> float16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vshrq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    let n: i32 = if N == 64 { 63 } else { N };
+    unsafe { simd_shr(a, vdupq_n_s64(n as _)) }
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f32)"]
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(ushr, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54893,21 +56370,26 @@ pub fn vsetq_lane_f16<const LANE: i32>(a: f16, b: float16x8_t) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vset_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vshr_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    let n: i32 = if N == 8 {
+        return vdup_n_u8(0);
+    } else {
+        N
+    };
+    unsafe { simd_shr(a, vdup_n_u8(n as _)) }
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f32)"]
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(ushr, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54916,21 +56398,26 @@ pub fn vset_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsetq_lane_f32<const LANE: i32>(a: f32, b: float32x4_t) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vshrq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    let n: i32 = if N == 8 {
+        return vdupq_n_u8(0);
+    } else {
+        N
+    };
+    unsafe { simd_shr(a, vdupq_n_u8(n as _)) }
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s8)"]
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(ushr, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54939,21 +56426,26 @@ pub fn vsetq_lane_f32<const LANE: i32>(a: f32, b: float32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vset_lane_s8<const LANE: i32>(a: i8, b: int8x8_t) -> int8x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vshr_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    let n: i32 = if N == 16 {
+        return vdup_n_u16(0);
+    } else {
+        N
+    };
+    unsafe { simd_shr(a, vdup_n_u16(n as _)) }
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s8)"]
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(ushr, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54962,21 +56454,26 @@ pub fn vset_lane_s8<const LANE: i32>(a: i8, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsetq_lane_s8<const LANE: i32>(a: i8, b: int8x16_t) -> int8x16_t {
-    static_assert_uimm_bits!(LANE, 4);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vshrq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    let n: i32 = if N == 16 {
+        return vdupq_n_u16(0);
+    } else {
+        N
+    };
+    unsafe { simd_shr(a, vdupq_n_u16(n as _)) }
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s16)"]
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(ushr, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -54985,21 +56482,26 @@ pub fn vsetq_lane_s8<const LANE: i32>(a: i8, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vset_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vshr_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    let n: i32 = if N == 32 {
+        return vdup_n_u32(0);
+    } else {
+        N
+    };
+    unsafe { simd_shr(a, vdup_n_u32(n as _)) }
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s16)"]
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(ushr, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -55008,21 +56510,26 @@ pub fn vset_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsetq_lane_s16<const LANE: i32>(a: i16, b: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vshrq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    let n: i32 = if N == 32 {
+        return vdupq_n_u32(0);
+    } else {
+        N
+    };
+    unsafe { simd_shr(a, vdupq_n_u32(n as _)) }
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s32)"]
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(ushr, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -55031,21 +56538,26 @@ pub fn vsetq_lane_s16<const LANE: i32>(a: i16, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vset_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vshr_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    let n: i32 = if N == 64 {
+        return vdup_n_u64(0);
+    } else {
+        N
+    };
+    unsafe { simd_shr(a, vdup_n_u64(n as _)) }
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s32)"]
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(ushr, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -55054,21 +56566,26 @@ pub fn vset_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsetq_lane_s32<const LANE: i32>(a: i32, b: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vshrq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    let n: i32 = if N == 64 {
+        return vdupq_n_u64(0);
+    } else {
+        N
+    };
+    unsafe { simd_shr(a, vdupq_n_u64(n as _)) }
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s64)"]
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(shrn, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -55077,21 +56594,21 @@ pub fn vsetq_lane_s32<const LANE: i32>(a: i32, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsetq_lane_s64<const LANE: i32>(a: i64, b: int64x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe { simd_cast(simd_shr(a, vdupq_n_s16(N as _))) }
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u8)"]
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(shrn, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -55100,21 +56617,21 @@ pub fn vsetq_lane_s64<const LANE: i32>(a: i64, b: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vset_lane_u8<const LANE: i32>(a: u8, b: uint8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe { simd_cast(simd_shr(a, vdupq_n_s32(N as _))) }
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u8)"]
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(shrn, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -55123,21 +56640,21 @@ pub fn vset_lane_u8<const LANE: i32>(a: u8, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsetq_lane_u8<const LANE: i32>(a: u8, b: uint8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(LANE, 4);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe { simd_cast(simd_shr(a, vdupq_n_s64(N as _))) }
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u16)"]
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(shrn, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -55146,21 +56663,21 @@ pub fn vsetq_lane_u8<const LANE: i32>(a: u8, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vset_lane_u16<const LANE: i32>(a: u16, b: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe { simd_cast(simd_shr(a, vdupq_n_u16(N as _))) }
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u16)"]
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(shrn, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -55169,21 +56686,21 @@ pub fn vset_lane_u16<const LANE: i32>(a: u16, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsetq_lane_u16<const LANE: i32>(a: u16, b: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe { simd_cast(simd_shr(a, vdupq_n_u32(N as _))) }
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u32)"]
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(shrn, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -55192,111 +56709,282 @@ pub fn vsetq_lane_u16<const LANE: i32>(a: u16, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vset_lane_u32<const LANE: i32>(a: u32, b: uint32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe { simd_cast(simd_shr(a, vdupq_n_u64(N as _))) }
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u32)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s8)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
-)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vsetq_lane_u32<const LANE: i32>(a: u32, b: uint32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vsli_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    vshiftlins_v8i8::<N>(a, b)
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u64)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s8)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
-)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vsetq_lane_u64<const LANE: i32>(a: u64, b: uint64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vsliq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    vshiftlins_v16i8::<N>(a, b)
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p8)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
-)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vset_lane_p8<const LANE: i32>(a: p8, b: poly8x8_t) -> poly8x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vsli_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    vshiftlins_v4i16::<N>(a, b)
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p8)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
-)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+pub fn vsliq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    vshiftlins_v8i16::<N>(a, b)
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s32)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub fn vsli_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert!(N >= 0 && N <= 31);
+    vshiftlins_v2i32::<N>(a, b)
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s32)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub fn vsliq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert!(N >= 0 && N <= 31);
+    vshiftlins_v4i32::<N>(a, b)
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s64)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub fn vsli_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    static_assert!(N >= 0 && N <= 63);
+    vshiftlins_v1i64::<N>(a, b)
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s64)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub fn vsliq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    static_assert!(N >= 0 && N <= 63);
+    vshiftlins_v2i64::<N>(a, b)
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u8)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub fn vsli_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe { transmute(vshiftlins_v8i8::<N>(transmute(a), transmute(b))) }
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u8)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub fn vsliq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe { transmute(vshiftlins_v16i8::<N>(transmute(a), transmute(b))) }
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u16)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub fn vsli_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe { transmute(vshiftlins_v4i16::<N>(transmute(a), transmute(b))) }
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u16)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub fn vsliq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe { transmute(vshiftlins_v8i16::<N>(transmute(a), transmute(b))) }
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u32)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub fn vsli_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    static_assert!(N >= 0 && N <= 31);
+    unsafe { transmute(vshiftlins_v2i32::<N>(transmute(a), transmute(b))) }
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u32)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub fn vsliq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    static_assert!(N >= 0 && N <= 31);
+    unsafe { transmute(vshiftlins_v4i32::<N>(transmute(a), transmute(b))) }
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u64)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub fn vsli_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    static_assert!(N >= 0 && N <= 63);
+    unsafe { transmute(vshiftlins_v1i64::<N>(transmute(a), transmute(b))) }
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u64)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub fn vsliq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    static_assert!(N >= 0 && N <= 63);
+    unsafe { transmute(vshiftlins_v2i64::<N>(transmute(a), transmute(b))) }
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p8)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub fn vsli_n_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe { transmute(vshiftlins_v8i8::<N>(transmute(a), transmute(b))) }
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p8)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub fn vsliq_n_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe { transmute(vshiftlins_v16i8::<N>(transmute(a), transmute(b))) }
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p16)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub fn vsli_n_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe { transmute(vshiftlins_v4i16::<N>(transmute(a), transmute(b))) }
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p16)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub fn vsliq_n_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe { transmute(vshiftlins_v8i16::<N>(transmute(a), transmute(b))) }
+}
+#[doc = "Signed shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ssra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsetq_lane_p8<const LANE: i32>(a: p8, b: poly8x16_t) -> poly8x16_t {
-    static_assert_uimm_bits!(LANE, 4);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vsra_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe { simd_add(a, vshr_n_s8::<N>(b)) }
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p16)"]
+#[doc = "Signed shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(ssra, N = 2)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -55307,19 +56995,19 @@ pub fn vsetq_lane_p8<const LANE: i32>(a: p8, b: poly8x16_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vset_lane_p16<const LANE: i32>(a: p16, b: poly16x4_t) -> poly16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vsraq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe { simd_add(a, vshrq_n_s8::<N>(b)) }
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p16)"]
+#[doc = "Signed shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(ssra, N = 2)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -55330,19 +57018,19 @@ pub fn vset_lane_p16<const LANE: i32>(a: p16, b: poly16x4_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsetq_lane_p16<const LANE: i32>(a: p16, b: poly16x8_t) -> poly16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vsra_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe { simd_add(a, vshr_n_s16::<N>(b)) }
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p64)"]
+#[doc = "Signed shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s16)"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(ssra, N = 2)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -55353,19 +57041,19 @@ pub fn vsetq_lane_p16<const LANE: i32>(a: p16, b: poly16x8_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vset_lane_p64<const LANE: i32>(a: p64, b: poly64x1_t) -> poly64x1_t {
-    static_assert!(LANE == 0);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vsraq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe { simd_add(a, vshrq_n_s16::<N>(b)) }
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s64)"]
+#[doc = "Signed shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(ssra, N = 2)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -55376,19 +57064,19 @@ pub fn vset_lane_p64<const LANE: i32>(a: p64, b: poly64x1_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vset_lane_s64<const LANE: i32>(a: i64, b: int64x1_t) -> int64x1_t {
-    static_assert!(LANE == 0);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vsra_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe { simd_add(a, vshr_n_s32::<N>(b)) }
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u64)"]
+#[doc = "Signed shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(ssra, N = 2)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -55399,19 +57087,19 @@ pub fn vset_lane_s64<const LANE: i32>(a: i64, b: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vset_lane_u64<const LANE: i32>(a: u64, b: uint64x1_t) -> uint64x1_t {
-    static_assert!(LANE == 0);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vsraq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe { simd_add(a, vshrq_n_s32::<N>(b)) }
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p64)"]
+#[doc = "Signed shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s64)"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(ssra, N = 2)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -55422,1451 +57110,1338 @@ pub fn vset_lane_u64<const LANE: i32>(a: u64, b: uint64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsetq_lane_p64<const LANE: i32>(a: p64, b: poly64x2_t) -> poly64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { simd_insert!(b, LANE as u32, a) }
+pub fn vsra_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    unsafe { simd_add(a, vshr_n_s64::<N>(b)) }
 }
-#[doc = "SHA1 hash update accelerator, choose."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1cq_u32)"]
+#[doc = "Signed shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s64)"]
 #[inline]
-#[target_feature(enable = "sha2")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(sha1c))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
 #[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ssra, N = 2)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-pub fn vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sha1c"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1c")]
-        fn _vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t;
-    }
-    unsafe { _vsha1cq_u32(hash_abcd, hash_e, wk) }
-}
-#[doc = "SHA1 fixed rotate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1h_u32)"]
-#[inline]
-#[target_feature(enable = "sha2")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(sha1h))]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
-)]
-pub fn vsha1h_u32(hash_e: u32) -> u32 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sha1h"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1h")]
-        fn _vsha1h_u32(hash_e: u32) -> u32;
-    }
-    unsafe { _vsha1h_u32(hash_e) }
+pub fn vsraq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    unsafe { simd_add(a, vshrq_n_s64::<N>(b)) }
 }
-#[doc = "SHA1 hash update accelerator, majority"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1mq_u32)"]
+#[doc = "Unsigned shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u8)"]
 #[inline]
-#[target_feature(enable = "sha2")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(sha1m))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
 #[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usra, N = 2)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-pub fn vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sha1m"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1m")]
-        fn _vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t;
-    }
-    unsafe { _vsha1mq_u32(hash_abcd, hash_e, wk) }
-}
-#[doc = "SHA1 hash update accelerator, parity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1pq_u32)"]
-#[inline]
-#[target_feature(enable = "sha2")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(sha1p))]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
+pub fn vsra_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe { simd_add(a, vshr_n_u8::<N>(b)) }
+}
+#[doc = "Unsigned shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-pub fn vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sha1p"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1p")]
-        fn _vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t;
-    }
-    unsafe { _vsha1pq_u32(hash_abcd, hash_e, wk) }
-}
-#[doc = "SHA1 schedule update accelerator, first part."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1su0q_u32)"]
-#[inline]
-#[target_feature(enable = "sha2")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(sha1su0))]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
+pub fn vsraq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe { simd_add(a, vshrq_n_u8::<N>(b)) }
+}
+#[doc = "Unsigned shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-pub fn vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sha1su0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su0")]
-        fn _vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t;
-    }
-    unsafe { _vsha1su0q_u32(w0_3, w4_7, w8_11) }
-}
-#[doc = "SHA1 schedule update accelerator, second part."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1su1q_u32)"]
-#[inline]
-#[target_feature(enable = "sha2")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(sha1su1))]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
+pub fn vsra_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe { simd_add(a, vshr_n_u16::<N>(b)) }
+}
+#[doc = "Unsigned shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-pub fn vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sha1su1"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su1")]
-        fn _vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t;
-    }
-    unsafe { _vsha1su1q_u32(tw0_3, w12_15) }
-}
-#[doc = "SHA1 schedule update accelerator, upper part."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256h2q_u32)"]
-#[inline]
-#[target_feature(enable = "sha2")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(sha256h2))]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
+pub fn vsraq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe { simd_add(a, vshrq_n_u16::<N>(b)) }
+}
+#[doc = "Unsigned shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-pub fn vsha256h2q_u32(hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sha256h2"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h2")]
-        fn _vsha256h2q_u32(
-            hash_abcd: uint32x4_t,
-            hash_efgh: uint32x4_t,
-            wk: uint32x4_t,
-        ) -> uint32x4_t;
-    }
-    unsafe { _vsha256h2q_u32(hash_abcd, hash_efgh, wk) }
-}
-#[doc = "SHA1 schedule update accelerator, first part."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256hq_u32)"]
-#[inline]
-#[target_feature(enable = "sha2")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(sha256h))]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
+pub fn vsra_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe { simd_add(a, vshr_n_u32::<N>(b)) }
+}
+#[doc = "Unsigned shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-pub fn vsha256hq_u32(hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sha256h"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h")]
-        fn _vsha256hq_u32(
-            hash_abcd: uint32x4_t,
-            hash_efgh: uint32x4_t,
-            wk: uint32x4_t,
-        ) -> uint32x4_t;
-    }
-    unsafe { _vsha256hq_u32(hash_abcd, hash_efgh, wk) }
-}
-#[doc = "SHA256 schedule update accelerator, first part."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256su0q_u32)"]
-#[inline]
-#[target_feature(enable = "sha2")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(sha256su0))]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
+pub fn vsraq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe { simd_add(a, vshrq_n_u32::<N>(b)) }
+}
+#[doc = "Unsigned shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-pub fn vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sha256su0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su0")]
-        fn _vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t;
-    }
-    unsafe { _vsha256su0q_u32(w0_3, w4_7) }
-}
-#[doc = "SHA256 schedule update accelerator, second part."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256su1q_u32)"]
-#[inline]
-#[target_feature(enable = "sha2")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(sha256su1))]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
+pub fn vsra_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    unsafe { simd_add(a, vshr_n_u64::<N>(b)) }
+}
+#[doc = "Unsigned shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-pub fn vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sha256su1"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su1")]
-        fn _vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t)
-            -> uint32x4_t;
-    }
-    unsafe { _vsha256su1q_u32(tw0_3, w8_11, w12_15) }
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vsraq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    unsafe { simd_add(a, vshrq_n_u64::<N>(b)) }
 }
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s8)"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))]
 #[rustc_legacy_const_generics(2)]
-fn vshiftlins_v16i8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v16i8")]
-        fn _vshiftlins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
-    }
-    unsafe { _vshiftlins_v16i8(a, b, const { int8x16_t([N as i8; 16]) }) }
+pub fn vsri_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    static_assert!(1 <= N && N <= 8);
+    vshiftrins_v8i8::<N>(a, b)
 }
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s8)"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))]
 #[rustc_legacy_const_generics(2)]
-fn vshiftlins_v1i64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v1i64")]
-        fn _vshiftlins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t;
-    }
-    unsafe { _vshiftlins_v1i64(a, b, const { int64x1_t([N as i64; 1]) }) }
+pub fn vsriq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    static_assert!(1 <= N && N <= 8);
+    vshiftrins_v16i8::<N>(a, b)
 }
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s16)"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))]
 #[rustc_legacy_const_generics(2)]
-fn vshiftlins_v2i32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i32")]
-        fn _vshiftlins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t;
-    }
-    unsafe { _vshiftlins_v2i32(a, b, const { int32x2_t([N; 2]) }) }
+pub fn vsri_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert!(1 <= N && N <= 16);
+    vshiftrins_v4i16::<N>(a, b)
 }
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s16)"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))]
 #[rustc_legacy_const_generics(2)]
-fn vshiftlins_v2i64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i64")]
-        fn _vshiftlins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
-    }
-    unsafe { _vshiftlins_v2i64(a, b, const { int64x2_t([N as i64; 2]) }) }
+pub fn vsriq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert!(1 <= N && N <= 16);
+    vshiftrins_v8i16::<N>(a, b)
 }
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s32)"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))]
 #[rustc_legacy_const_generics(2)]
-fn vshiftlins_v4i16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i16")]
-        fn _vshiftlins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t;
-    }
-    unsafe { _vshiftlins_v4i16(a, b, const { int16x4_t([N as i16; 4]) }) }
+pub fn vsri_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert!(1 <= N && N <= 32);
+    vshiftrins_v2i32::<N>(a, b)
 }
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s32)"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))]
 #[rustc_legacy_const_generics(2)]
-fn vshiftlins_v4i32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i32")]
-        fn _vshiftlins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
-    }
-    unsafe { _vshiftlins_v4i32(a, b, const { int32x4_t([N; 4]) }) }
+pub fn vsriq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert!(1 <= N && N <= 32);
+    vshiftrins_v4i32::<N>(a, b)
 }
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s64)"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))]
 #[rustc_legacy_const_generics(2)]
-fn vshiftlins_v8i16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i16")]
-        fn _vshiftlins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
-    }
-    unsafe { _vshiftlins_v8i16(a, b, const { int16x8_t([N as i16; 8]) }) }
+pub fn vsri_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    static_assert!(1 <= N && N <= 64);
+    vshiftrins_v1i64::<N>(a, b)
 }
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s64)"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))]
 #[rustc_legacy_const_generics(2)]
-fn vshiftlins_v8i8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i8")]
-        fn _vshiftlins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
-    }
-    unsafe { _vshiftlins_v8i8(a, b, const { int8x8_t([N as i8; 8]) }) }
+pub fn vsriq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    static_assert!(1 <= N && N <= 64);
+    vshiftrins_v2i64::<N>(a, b)
 }
 #[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v16i8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u8)"]
 #[inline]
-#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))]
 #[rustc_legacy_const_generics(2)]
-fn vshiftrins_v16i8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v16i8")]
-        fn _vshiftrins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
-    }
-    unsafe { _vshiftrins_v16i8(a, b, const { int8x16_t([-N as i8; 16]) }) }
+pub fn vsri_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    static_assert!(1 <= N && N <= 8);
+    unsafe { transmute(vshiftrins_v8i8::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v1i64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u8)"]
 #[inline]
-#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))]
 #[rustc_legacy_const_generics(2)]
-fn vshiftrins_v1i64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v1i64")]
-        fn _vshiftrins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t;
-    }
-    unsafe { _vshiftrins_v1i64(a, b, const { int64x1_t([-N as i64; 1]) }) }
+pub fn vsriq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    static_assert!(1 <= N && N <= 8);
+    unsafe { transmute(vshiftrins_v16i8::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v2i32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u16)"]
 #[inline]
-#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))]
 #[rustc_legacy_const_generics(2)]
-fn vshiftrins_v2i32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i32")]
-        fn _vshiftrins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t;
-    }
-    unsafe { _vshiftrins_v2i32(a, b, const { int32x2_t([-N; 2]) }) }
+pub fn vsri_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    static_assert!(1 <= N && N <= 16);
+    unsafe { transmute(vshiftrins_v4i16::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v2i64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u16)"]
 #[inline]
-#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))]
 #[rustc_legacy_const_generics(2)]
-fn vshiftrins_v2i64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i64")]
-        fn _vshiftrins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
-    }
-    unsafe { _vshiftrins_v2i64(a, b, const { int64x2_t([-N as i64; 2]) }) }
+pub fn vsriq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    static_assert!(1 <= N && N <= 16);
+    unsafe { transmute(vshiftrins_v8i16::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v4i16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))]
 #[rustc_legacy_const_generics(2)]
-fn vshiftrins_v4i16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i16")]
-        fn _vshiftrins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t;
-    }
-    unsafe { _vshiftrins_v4i16(a, b, const { int16x4_t([-N as i16; 4]) }) }
+pub fn vsri_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    static_assert!(1 <= N && N <= 32);
+    unsafe { transmute(vshiftrins_v2i32::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v4i32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))]
 #[rustc_legacy_const_generics(2)]
-fn vshiftrins_v4i32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i32")]
-        fn _vshiftrins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
-    }
-    unsafe { _vshiftrins_v4i32(a, b, const { int32x4_t([-N; 4]) }) }
+pub fn vsriq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    static_assert!(1 <= N && N <= 32);
+    unsafe { transmute(vshiftrins_v4i32::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v8i16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u64)"]
 #[inline]
-#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))]
 #[rustc_legacy_const_generics(2)]
-fn vshiftrins_v8i16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i16")]
-        fn _vshiftrins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
-    }
-    unsafe { _vshiftrins_v8i16(a, b, const { int16x8_t([-N as i16; 8]) }) }
+pub fn vsri_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    static_assert!(1 <= N && N <= 64);
+    unsafe { transmute(vshiftrins_v1i64::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v8i8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u64)"]
 #[inline]
-#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))]
 #[rustc_legacy_const_generics(2)]
-fn vshiftrins_v8i8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i8")]
-        fn _vshiftrins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
-    }
-    unsafe { _vshiftrins_v8i8(a, b, const { int8x8_t([-N as i8; 8]) }) }
+pub fn vsriq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    static_assert!(1 <= N && N <= 64);
+    unsafe { transmute(vshiftrins_v2i64::<N>(transmute(a), transmute(b))) }
 }
-#[doc = "Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s8)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p8)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshl_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shl(a, vdup_n_s8(N as _)) }
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub fn vsri_n_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    static_assert!(1 <= N && N <= 8);
+    unsafe { transmute(vshiftrins_v8i8::<N>(transmute(a), transmute(b))) }
 }
-#[doc = "Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s8)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p8)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshlq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shl(a, vdupq_n_s8(N as _)) }
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub fn vsriq_n_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    static_assert!(1 <= N && N <= 8);
+    unsafe { transmute(vshiftrins_v16i8::<N>(transmute(a), transmute(b))) }
 }
-#[doc = "Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s16)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshl_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe { simd_shl(a, vdup_n_s16(N as _)) }
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub fn vsri_n_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    static_assert!(1 <= N && N <= 16);
+    unsafe { transmute(vshiftrins_v4i16::<N>(transmute(a), transmute(b))) }
 }
-#[doc = "Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s16)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshlq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe { simd_shl(a, vdupq_n_s16(N as _)) }
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub fn vsriq_n_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    static_assert!(1 <= N && N <= 16);
+    unsafe { transmute(vshiftrins_v8i16::<N>(transmute(a), transmute(b))) }
 }
-#[doc = "Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s32)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshl_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(N, 5);
-    unsafe { simd_shl(a, vdup_n_s32(N as _)) }
-}
-#[doc = "Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshlq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 5);
-    unsafe { simd_shl(a, vdupq_n_s32(N as _)) }
-}
-#[doc = "Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshl_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
-    static_assert_uimm_bits!(N, 6);
-    unsafe { simd_shl(a, vdup_n_s64(N as _)) }
-}
-#[doc = "Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshlq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 6);
-    unsafe { simd_shl(a, vdupq_n_s64(N as _)) }
-}
-#[doc = "Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshl_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shl(a, vdup_n_u8(N as _)) }
-}
-#[doc = "Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshlq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shl(a, vdupq_n_u8(N as _)) }
-}
-#[doc = "Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshl_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe { simd_shl(a, vdup_n_u16(N as _)) }
-}
-#[doc = "Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshlq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe { simd_shl(a, vdupq_n_u16(N as _)) }
-}
-#[doc = "Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshl_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(N, 5);
-    unsafe { simd_shl(a, vdup_n_u32(N as _)) }
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
+pub unsafe fn vst1_f16(ptr: *mut f16, a: float16x4_t) {
+    vst1_v4f16(
+        ptr as *const i8,
+        transmute(a),
+        crate::mem::align_of::<f16>() as i32,
+    )
 }
-#[doc = "Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u32)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshlq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(N, 5);
-    unsafe { simd_shl(a, vdupq_n_u32(N as _)) }
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
+pub unsafe fn vst1q_f16(ptr: *mut f16, a: float16x8_t) {
+    vst1q_v8f16(
+        ptr as *const i8,
+        transmute(a),
+        crate::mem::align_of::<f16>() as i32,
+    )
 }
-#[doc = "Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u64)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshl_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
-    static_assert_uimm_bits!(N, 6);
-    unsafe { simd_shl(a, vdup_n_u64(N as _)) }
+#[cfg_attr(test, assert_instr(vst1))]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vst1_f16_x2(a: *mut f16, b: float16x4x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0.v4f16")]
+        fn _vst1_f16_x2(ptr: *mut f16, a: float16x4_t, b: float16x4_t);
+    }
+    _vst1_f16_x2(a, b.0, b.1)
 }
-#[doc = "Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u64)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshlq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(N, 6);
-    unsafe { simd_shl(a, vdupq_n_u64(N as _)) }
+#[cfg_attr(test, assert_instr(vst1))]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vst1q_f16_x2(a: *mut f16, b: float16x8x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0.v8f16")]
+        fn _vst1q_f16_x2(ptr: *mut f16, a: float16x8_t, b: float16x8_t);
+    }
+    _vst1q_f16_x2(a, b.0, b.1)
 }
-#[doc = "Signed Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s8)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sshl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(st1))]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vst1_f16_x2(a: *mut f16, b: float16x4x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sshl.v8i8"
+            link_name = "llvm.aarch64.neon.st1x2.v4f16.p0"
         )]
-        fn _vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+        fn _vst1_f16_x2(a: float16x4_t, b: float16x4_t, ptr: *mut f16);
     }
-    unsafe { _vshl_s8(a, b) }
+    _vst1_f16_x2(b.0, b.1, a)
 }
-#[doc = "Signed Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s8)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sshl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(st1))]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vst1q_f16_x2(a: *mut f16, b: float16x8x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sshl.v16i8"
+            link_name = "llvm.aarch64.neon.st1x2.v8f16.p0"
         )]
-        fn _vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+        fn _vst1q_f16_x2(a: float16x8_t, b: float16x8_t, ptr: *mut f16);
     }
-    unsafe { _vshlq_s8(a, b) }
+    _vst1q_f16_x2(b.0, b.1, a)
 }
-#[doc = "Signed Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s16)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sshl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+#[cfg_attr(test, assert_instr(vst1))]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vst1_f16_x3(a: *mut f16, b: float16x4x3_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sshl.v4i16"
-        )]
-        fn _vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v4f16")]
+        fn _vst1_f16_x3(ptr: *mut f16, a: float16x4_t, b: float16x4_t, c: float16x4_t);
     }
-    unsafe { _vshl_s16(a, b) }
+    _vst1_f16_x3(a, b.0, b.1, b.2)
 }
-#[doc = "Signed Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s16)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sshl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+#[cfg_attr(test, assert_instr(vst1))]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vst1q_f16_x3(a: *mut f16, b: float16x8x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v8f16")]
+        fn _vst1q_f16_x3(ptr: *mut f16, a: float16x8_t, b: float16x8_t, c: float16x8_t);
+    }
+    _vst1q_f16_x3(a, b.0, b.1, b.2)
+}
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(st1))]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vst1_f16_x3(a: *mut f16, b: float16x4x3_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sshl.v8i16"
+            link_name = "llvm.aarch64.neon.st1x3.v4f16.p0"
         )]
-        fn _vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+        fn _vst1_f16_x3(a: float16x4_t, b: float16x4_t, c: float16x4_t, ptr: *mut f16);
     }
-    unsafe { _vshlq_s16(a, b) }
+    _vst1_f16_x3(b.0, b.1, b.2, a)
 }
-#[doc = "Signed Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s32)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sshl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(st1))]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vst1q_f16_x3(a: *mut f16, b: float16x8x3_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sshl.v2i32"
+            link_name = "llvm.aarch64.neon.st1x3.v8f16.p0"
         )]
-        fn _vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+        fn _vst1q_f16_x3(a: float16x8_t, b: float16x8_t, c: float16x8_t, ptr: *mut f16);
     }
-    unsafe { _vshl_s32(a, b) }
+    _vst1q_f16_x3(b.0, b.1, b.2, a)
 }
-#[doc = "Signed Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s32)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sshl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_f16_x4(a: *mut f16, b: float16x4x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sshl.v4i32"
-        )]
-        fn _vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4f16")]
+        fn _vst1_f16_x4(
+            ptr: *mut f16,
+            a: float16x4_t,
+            b: float16x4_t,
+            c: float16x4_t,
+            d: float16x4_t,
+        );
     }
-    unsafe { _vshlq_s32(a, b) }
+    _vst1_f16_x4(a, b.0, b.1, b.2, b.3)
 }
-#[doc = "Signed Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s64)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sshl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v1i64")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sshl.v1i64"
-        )]
-        fn _vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v8f16")]
+        fn _vst1q_f16_x4(
+            ptr: *mut f16,
+            a: float16x8_t,
+            b: float16x8_t,
+            c: float16x8_t,
+            d: float16x8_t,
+        );
     }
-    unsafe { _vshl_s64(a, b) }
+    _vst1q_f16_x4(a, b.0, b.1, b.2, b.3)
 }
-#[doc = "Signed Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s64)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sshl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(st1))]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vst1_f16_x4(a: *mut f16, b: float16x4x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sshl.v2i64"
+            link_name = "llvm.aarch64.neon.st1x4.v4f16.p0"
         )]
-        fn _vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+        fn _vst1_f16_x4(
+            a: float16x4_t,
+            b: float16x4_t,
+            c: float16x4_t,
+            d: float16x4_t,
+            ptr: *mut f16,
+        );
     }
-    unsafe { _vshlq_s64(a, b) }
+    _vst1_f16_x4(b.0, b.1, b.2, b.3, a)
 }
-#[doc = "Unsigned Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u8)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ushl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(st1))]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ushl.v8i8"
+            link_name = "llvm.aarch64.neon.st1x4.v8f16.p0"
         )]
-        fn _vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
+        fn _vst1q_f16_x4(
+            a: float16x8_t,
+            b: float16x8_t,
+            c: float16x8_t,
+            d: float16x8_t,
+            ptr: *mut f16,
+        );
     }
-    unsafe { _vshl_u8(a, b) }
+    _vst1q_f16_x4(b.0, b.1, b.2, b.3, a)
 }
-#[doc = "Unsigned Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u8)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ushl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v16i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ushl.v16i8"
-        )]
-        fn _vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
-    }
-    unsafe { _vshlq_u8(a, b) }
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
+pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) {
+    const ALIGN: i32 = crate::mem::align_of::<f32>() as i32;
+    vst1_v2f32::<ALIGN>(ptr as *const i8, transmute(a))
 }
-#[doc = "Unsigned Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u16)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ushl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ushl.v4i16"
-        )]
-        fn _vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
-    }
-    unsafe { _vshl_u16(a, b) }
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
+pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) {
+    const ALIGN: i32 = crate::mem::align_of::<f32>() as i32;
+    vst1q_v4f32::<ALIGN>(ptr as *const i8, transmute(a))
 }
-#[doc = "Unsigned Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u16)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ushl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ushl.v8i16"
-        )]
-        fn _vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
-    }
-    unsafe { _vshlq_u16(a, b) }
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
+pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) {
+    const ALIGN: i32 = crate::mem::align_of::<i8>() as i32;
+    vst1_v8i8::<ALIGN>(ptr as *const i8, a)
 }
-#[doc = "Unsigned Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u32)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ushl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ushl.v2i32"
-        )]
-        fn _vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
-    }
-    unsafe { _vshl_u32(a, b) }
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
+pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) {
+    const ALIGN: i32 = crate::mem::align_of::<i8>() as i32;
+    vst1q_v16i8::<ALIGN>(ptr as *const i8, a)
 }
-#[doc = "Unsigned Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u32)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ushl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ushl.v4i32"
-        )]
-        fn _vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
-    }
-    unsafe { _vshlq_u32(a, b) }
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
+pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) {
+    const ALIGN: i32 = crate::mem::align_of::<i16>() as i32;
+    vst1_v4i16::<ALIGN>(ptr as *const i8, a)
 }
-#[doc = "Unsigned Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ushl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v1i64")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ushl.v1i64"
-        )]
-        fn _vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
-    }
-    unsafe { _vshl_u64(a, b) }
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
+pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) {
+    const ALIGN: i32 = crate::mem::align_of::<i16>() as i32;
+    vst1q_v8i16::<ALIGN>(ptr as *const i8, a)
 }
-#[doc = "Unsigned Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ushl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i64")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ushl.v2i64"
-        )]
-        fn _vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
-    }
-    unsafe { _vshlq_u64(a, b) }
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
+pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) {
+    const ALIGN: i32 = crate::mem::align_of::<i32>() as i32;
+    vst1_v2i32::<ALIGN>(ptr as *const i8, a)
 }
-#[doc = "Signed shift left long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s16)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s16", N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sshll, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshll_n_s16<const N: i32>(a: int16x4_t) -> int32x4_t {
-    static_assert!(N >= 0 && N <= 16);
-    unsafe { simd_shl(simd_cast(a), vdupq_n_s32(N as _)) }
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
+pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) {
+    const ALIGN: i32 = crate::mem::align_of::<i32>() as i32;
+    vst1q_v4i32::<ALIGN>(ptr as *const i8, a)
 }
-#[doc = "Signed shift left long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s32)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s32", N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sshll, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshll_n_s32<const N: i32>(a: int32x2_t) -> int64x2_t {
-    static_assert!(N >= 0 && N <= 32);
-    unsafe { simd_shl(simd_cast(a), vdupq_n_s64(N as _)) }
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
+pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) {
+    const ALIGN: i32 = crate::mem::align_of::<i64>() as i32;
+    vst1_v1i64::<ALIGN>(ptr as *const i8, a)
 }
-#[doc = "Signed shift left long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s8)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s8", N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sshll, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshll_n_s8<const N: i32>(a: int8x8_t) -> int16x8_t {
-    static_assert!(N >= 0 && N <= 8);
-    unsafe { simd_shl(simd_cast(a), vdupq_n_s16(N as _)) }
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
+pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) {
+    const ALIGN: i32 = crate::mem::align_of::<i64>() as i32;
+    vst1q_v2i64::<ALIGN>(ptr as *const i8, a)
 }
-#[doc = "Signed shift left long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u16)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u16", N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ushll, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vshll_n_u16<const N: i32>(a: uint16x4_t) -> uint32x4_t {
-    static_assert!(N >= 0 && N <= 16);
-    unsafe { simd_shl(simd_cast(a), vdupq_n_u32(N as _)) }
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
+pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) {
+    const ALIGN: i32 = crate::mem::align_of::<u8>() as i32;
+    vst1_v8i8::<ALIGN>(ptr as *const i8, transmute(a))
 }
-#[doc = "Signed shift left long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u32)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u32", N = 2))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
+pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) {
+    const ALIGN: i32 = crate::mem::align_of::<u8>() as i32;
+    vst1q_v16i8::<ALIGN>(ptr as *const i8, transmute(a))
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
+pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) {
+    const ALIGN: i32 = crate::mem::align_of::<u16>() as i32;
+    vst1_v4i16::<ALIGN>(ptr as *const i8, transmute(a))
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
+pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) {
+    const ALIGN: i32 = crate::mem::align_of::<u16>() as i32;
+    vst1q_v8i16::<ALIGN>(ptr as *const i8, transmute(a))
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
+pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) {
+    const ALIGN: i32 = crate::mem::align_of::<u32>() as i32;
+    vst1_v2i32::<ALIGN>(ptr as *const i8, transmute(a))
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
+pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) {
+    const ALIGN: i32 = crate::mem::align_of::<u32>() as i32;
+    vst1q_v4i32::<ALIGN>(ptr as *const i8, transmute(a))
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
+pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) {
+    const ALIGN: i32 = crate::mem::align_of::<u64>() as i32;
+    vst1_v1i64::<ALIGN>(ptr as *const i8, transmute(a))
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
+pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) {
+    const ALIGN: i32 = crate::mem::align_of::<u64>() as i32;
+    vst1q_v2i64::<ALIGN>(ptr as *const i8, transmute(a))
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
+pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) {
+    const ALIGN: i32 = crate::mem::align_of::<p8>() as i32;
+    vst1_v8i8::<ALIGN>(ptr as *const i8, transmute(a))
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
+pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) {
+    const ALIGN: i32 = crate::mem::align_of::<p8>() as i32;
+    vst1q_v16i8::<ALIGN>(ptr as *const i8, transmute(a))
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
+pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) {
+    const ALIGN: i32 = crate::mem::align_of::<p16>() as i32;
+    vst1_v4i16::<ALIGN>(ptr as *const i8, transmute(a))
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
+pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) {
+    const ALIGN: i32 = crate::mem::align_of::<p16>() as i32;
+    vst1q_v8i16::<ALIGN>(ptr as *const i8, transmute(a))
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
+pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) {
+    const ALIGN: i32 = crate::mem::align_of::<p64>() as i32;
+    vst1_v1i64::<ALIGN>(ptr as *const i8, transmute(a))
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
+pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) {
+    const ALIGN: i32 = crate::mem::align_of::<p64>() as i32;
+    vst1q_v2i64::<ALIGN>(ptr as *const i8, transmute(a))
+}
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst1))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v2f32.p0")]
+        fn _vst1_f32_x2(ptr: *mut f32, a: float32x2_t, b: float32x2_t);
+    }
+    _vst1_f32_x2(a, b.0, b.1)
+}
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst1))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v4f32.p0")]
+        fn _vst1q_f32_x2(ptr: *mut f32, a: float32x4_t, b: float32x4_t);
+    }
+    _vst1q_f32_x2(a, b.0, b.1)
+}
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(st1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x2.v2f32.p0"
+        )]
+        fn _vst1_f32_x2(a: float32x2_t, b: float32x2_t, ptr: *mut f32);
+    }
+    _vst1_f32_x2(b.0, b.1, a)
+}
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(st1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x2.v4f32.p0"
+        )]
+        fn _vst1q_f32_x2(a: float32x4_t, b: float32x4_t, ptr: *mut f32);
+    }
+    _vst1q_f32_x2(b.0, b.1, a)
+}
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(st1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x3.v2f32.p0"
+        )]
+        fn _vst1_f32_x3(a: float32x2_t, b: float32x2_t, c: float32x2_t, ptr: *mut f32);
+    }
+    _vst1_f32_x3(b.0, b.1, b.2, a)
+}
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(st1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x3.v4f32.p0"
+        )]
+        fn _vst1q_f32_x3(a: float32x4_t, b: float32x4_t, c: float32x4_t, ptr: *mut f32);
+    }
+    _vst1q_f32_x3(b.0, b.1, b.2, a)
+}
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v2f32.p0")]
+        fn _vst1_f32_x4(
+            ptr: *mut f32,
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            d: float32x2_t,
+        );
+    }
+    _vst1_f32_x4(a, b.0, b.1, b.2, b.3)
+}
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4f32.p0")]
+        fn _vst1q_f32_x4(
+            ptr: *mut f32,
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            d: float32x4_t,
+        );
+    }
+    _vst1q_f32_x4(a, b.0, b.1, b.2, b.3)
+}
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(st1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x4.v2f32.p0"
+        )]
+        fn _vst1_f32_x4(
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            d: float32x2_t,
+            ptr: *mut f32,
+        );
+    }
+    _vst1_f32_x4(b.0, b.1, b.2, b.3, a)
+}
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(st1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x4.v4f32.p0"
+        )]
+        fn _vst1q_f32_x4(
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            d: float32x4_t,
+            ptr: *mut f32,
+        );
+    }
+    _vst1q_f32_x4(b.0, b.1, b.2, b.3, a)
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ushll, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vst1_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    *a = simd_extract!(b, LANE as u32);
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
 )]
-pub fn vshll_n_u32<const N: i32>(a: uint32x2_t) -> uint64x2_t {
-    static_assert!(N >= 0 && N <= 32);
-    unsafe { simd_shl(simd_cast(a), vdupq_n_u64(N as _)) }
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vst1q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Signed shift left long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u8)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u8", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ushll, N = 2)
+    assert_instr(nop, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -56875,21 +58450,23 @@ pub fn vshll_n_u32<const N: i32>(a: uint32x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vshll_n_u8<const N: i32>(a: uint8x8_t) -> uint16x8_t {
-    static_assert!(N >= 0 && N <= 8);
-    unsafe { simd_shl(simd_cast(a), vdupq_n_u16(N as _)) }
+pub unsafe fn vst1_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s8)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sshr, N = 2)
+    assert_instr(nop, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -56898,22 +58475,23 @@ pub fn vshll_n_u8<const N: i32>(a: uint8x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vshr_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    let n: i32 = if N == 8 { 7 } else { N };
-    unsafe { simd_shr(a, vdup_n_s8(n as _)) }
+pub unsafe fn vst1q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s8)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sshr, N = 2)
+    assert_instr(nop, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -56922,22 +58500,23 @@ pub fn vshr_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vshrq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    let n: i32 = if N == 8 { 7 } else { N };
-    unsafe { simd_shr(a, vdupq_n_s8(n as _)) }
+pub unsafe fn vst1_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s16)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sshr, N = 2)
+    assert_instr(nop, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -56946,22 +58525,23 @@ pub fn vshrq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vshr_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    let n: i32 = if N == 16 { 15 } else { N };
-    unsafe { simd_shr(a, vdup_n_s16(n as _)) }
+pub unsafe fn vst1q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16_t) {
+    static_assert_uimm_bits!(LANE, 4);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s16)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sshr, N = 2)
+    assert_instr(nop, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -56970,22 +58550,23 @@ pub fn vshr_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vshrq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    let n: i32 = if N == 16 { 15 } else { N };
-    unsafe { simd_shr(a, vdupq_n_s16(n as _)) }
+pub unsafe fn vst1_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s32)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sshr, N = 2)
+    assert_instr(nop, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -56994,22 +58575,23 @@ pub fn vshrq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vshr_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    let n: i32 = if N == 32 { 31 } else { N };
-    unsafe { simd_shr(a, vdup_n_s32(n as _)) }
+pub unsafe fn vst1q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s32)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sshr, N = 2)
+    assert_instr(nop, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -57018,22 +58600,23 @@ pub fn vshr_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vshrq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    let n: i32 = if N == 32 { 31 } else { N };
-    unsafe { simd_shr(a, vdupq_n_s32(n as _)) }
+pub unsafe fn vst1_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sshr, N = 2)
+    assert_instr(nop, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -57042,22 +58625,23 @@ pub fn vshrq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vshr_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    let n: i32 = if N == 64 { 63 } else { N };
-    unsafe { simd_shr(a, vdup_n_s64(n as _)) }
+pub unsafe fn vst1q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sshr, N = 2)
+    assert_instr(nop, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -57066,22 +58650,23 @@ pub fn vshr_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vshrq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    let n: i32 = if N == 64 { 63 } else { N };
-    unsafe { simd_shr(a, vdupq_n_s64(n as _)) }
+pub unsafe fn vst1q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u8)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ushr, N = 2)
+    assert_instr(nop, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -57090,26 +58675,23 @@ pub fn vshrq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vshr_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    let n: i32 = if N == 8 {
-        return vdup_n_u8(0);
-    } else {
-        N
-    };
-    unsafe { simd_shr(a, vdup_n_u8(n as _)) }
+pub unsafe fn vst1_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u8)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ushr, N = 2)
+    assert_instr(nop, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -57118,26 +58700,23 @@ pub fn vshr_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vshrq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    let n: i32 = if N == 8 {
-        return vdupq_n_u8(0);
-    } else {
-        N
-    };
-    unsafe { simd_shr(a, vdupq_n_u8(n as _)) }
+pub unsafe fn vst1q_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x16_t) {
+    static_assert_uimm_bits!(LANE, 4);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u16)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ushr, N = 2)
+    assert_instr(nop, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -57146,26 +58725,23 @@ pub fn vshrq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vshr_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    let n: i32 = if N == 16 {
-        return vdup_n_u16(0);
-    } else {
-        N
-    };
-    unsafe { simd_shr(a, vdup_n_u16(n as _)) }
+pub unsafe fn vst1_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u16)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ushr, N = 2)
+    assert_instr(nop, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -57174,26 +58750,23 @@ pub fn vshr_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vshrq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    let n: i32 = if N == 16 {
-        return vdupq_n_u16(0);
-    } else {
-        N
-    };
-    unsafe { simd_shr(a, vdupq_n_u16(n as _)) }
+pub unsafe fn vst1q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u32)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ushr, N = 2)
+    assert_instr(nop, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -57202,26 +58775,23 @@ pub fn vshrq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vshr_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    let n: i32 = if N == 32 {
-        return vdup_n_u32(0);
-    } else {
-        N
-    };
-    unsafe { simd_shr(a, vdup_n_u32(n as _)) }
+pub unsafe fn vst1_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u32)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ushr, N = 2)
+    assert_instr(nop, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -57230,26 +58800,23 @@ pub fn vshr_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vshrq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    let n: i32 = if N == 32 {
-        return vdupq_n_u32(0);
-    } else {
-        N
-    };
-    unsafe { simd_shr(a, vdupq_n_u32(n as _)) }
+pub unsafe fn vst1q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ushr, N = 2)
+    assert_instr(nop, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -57258,26 +58825,23 @@ pub fn vshrq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vshr_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    let n: i32 = if N == 64 {
-        return vdup_n_u64(0);
-    } else {
-        N
-    };
-    unsafe { simd_shr(a, vdup_n_u64(n as _)) }
+pub unsafe fn vst1q_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ushr, N = 2)
+    assert_instr(nop, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -57286,26 +58850,23 @@ pub fn vshr_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vshrq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    let n: i32 = if N == 64 {
-        return vdupq_n_u64(0);
-    } else {
-        N
-    };
-    unsafe { simd_shr(a, vdupq_n_u64(n as _)) }
+pub unsafe fn vst1_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x8_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s16)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shrn, N = 2)
+    assert_instr(nop, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -57314,21 +58875,23 @@ pub fn vshrq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe { simd_cast(simd_shr(a, vdupq_n_s16(N as _))) }
+pub unsafe fn vst1q_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x16_t) {
+    static_assert_uimm_bits!(LANE, 4);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s32)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shrn, N = 2)
+    assert_instr(nop, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -57337,21 +58900,23 @@ pub fn vshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_cast(simd_shr(a, vdupq_n_s32(N as _))) }
+pub unsafe fn vst1_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shrn, N = 2)
+    assert_instr(nop, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -57360,21 +58925,23 @@ pub fn vshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_cast(simd_shr(a, vdupq_n_s64(N as _))) }
+pub unsafe fn vst1q_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x8_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u16)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shrn, N = 2)
+    assert_instr(nop, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -57383,21 +58950,23 @@ pub fn vshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe { simd_cast(simd_shr(a, vdupq_n_u16(N as _))) }
+pub unsafe fn vst1_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x1_t) {
+    static_assert!(LANE == 0);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u32)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shrn, N = 2)
+    assert_instr(nop, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -57406,21 +58975,23 @@ pub fn vshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_cast(simd_shr(a, vdupq_n_u32(N as _))) }
+pub unsafe fn vst1_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1_t) {
+    static_assert!(LANE == 0);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shrn, N = 2)
+    assert_instr(nop, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -57429,1739 +59000,1437 @@ pub fn vshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_cast(simd_shr(a, vdupq_n_u64(N as _))) }
+pub unsafe fn vst1_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1_t) {
+    static_assert!(LANE == 0);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s8)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsli_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    vshiftlins_v8i8::<N>(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_p64_x2(a: *mut p64, b: poly64x1x2_t) {
+    vst1_s64_x2(transmute(a), transmute(b))
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s8)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsliq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    vshiftlins_v16i8::<N>(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_p64_x3(a: *mut p64, b: poly64x1x3_t) {
+    vst1_s64_x3(transmute(a), transmute(b))
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s16)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsli_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    vshiftlins_v4i16::<N>(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_p64_x4(a: *mut p64, b: poly64x1x4_t) {
+    vst1_s64_x4(transmute(a), transmute(b))
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s16)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsliq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    vshiftlins_v8i16::<N>(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_p64_x2(a: *mut p64, b: poly64x2x2_t) {
+    vst1q_s64_x2(transmute(a), transmute(b))
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s32)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsli_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert!(N >= 0 && N <= 31);
-    vshiftlins_v2i32::<N>(a, b)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_p64_x3(a: *mut p64, b: poly64x2x3_t) {
+    vst1q_s64_x3(transmute(a), transmute(b))
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s32)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsliq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert!(N >= 0 && N <= 31);
-    vshiftlins_v4i32::<N>(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_p64_x4(a: *mut p64, b: poly64x2x4_t) {
+    vst1q_s64_x4(transmute(a), transmute(b))
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsli_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    static_assert!(N >= 0 && N <= 63);
-    vshiftlins_v1i64::<N>(a, b)
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x2.v8i8.p0"
+        )]
+        fn _vst1_s8_x2(a: int8x8_t, b: int8x8_t, ptr: *mut i8);
+    }
+    _vst1_s8_x2(b.0, b.1, a)
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsliq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    static_assert!(N >= 0 && N <= 63);
-    vshiftlins_v2i64::<N>(a, b)
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x2.v16i8.p0"
+        )]
+        fn _vst1q_s8_x2(a: int8x16_t, b: int8x16_t, ptr: *mut i8);
+    }
+    _vst1q_s8_x2(b.0, b.1, a)
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u8)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsli_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe { transmute(vshiftlins_v8i8::<N>(transmute(a), transmute(b))) }
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x2.v4i16.p0"
+        )]
+        fn _vst1_s16_x2(a: int16x4_t, b: int16x4_t, ptr: *mut i16);
+    }
+    _vst1_s16_x2(b.0, b.1, a)
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u8)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsliq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe { transmute(vshiftlins_v16i8::<N>(transmute(a), transmute(b))) }
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x2.v8i16.p0"
+        )]
+        fn _vst1q_s16_x2(a: int16x8_t, b: int16x8_t, ptr: *mut i16);
+    }
+    _vst1q_s16_x2(b.0, b.1, a)
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u16)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsli_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe { transmute(vshiftlins_v4i16::<N>(transmute(a), transmute(b))) }
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x2.v2i32.p0"
+        )]
+        fn _vst1_s32_x2(a: int32x2_t, b: int32x2_t, ptr: *mut i32);
+    }
+    _vst1_s32_x2(b.0, b.1, a)
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u16)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsliq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe { transmute(vshiftlins_v8i16::<N>(transmute(a), transmute(b))) }
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x2.v4i32.p0"
+        )]
+        fn _vst1q_s32_x2(a: int32x4_t, b: int32x4_t, ptr: *mut i32);
+    }
+    _vst1q_s32_x2(b.0, b.1, a)
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u32)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x2.v1i64.p0"
+        )]
+        fn _vst1_s64_x2(a: int64x1_t, b: int64x1_t, ptr: *mut i64);
+    }
+    _vst1_s64_x2(b.0, b.1, a)
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x2.v2i64.p0"
+        )]
+        fn _vst1q_s64_x2(a: int64x2_t, b: int64x2_t, ptr: *mut i64);
+    }
+    _vst1q_s64_x2(b.0, b.1, a)
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsli_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    static_assert!(N >= 0 && N <= 31);
-    unsafe { transmute(vshiftlins_v2i32::<N>(transmute(a), transmute(b))) }
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v8i8.p0")]
+        fn _vst1_s8_x2(ptr: *mut i8, a: int8x8_t, b: int8x8_t);
+    }
+    _vst1_s8_x2(a, b.0, b.1)
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u32)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsliq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    static_assert!(N >= 0 && N <= 31);
-    unsafe { transmute(vshiftlins_v4i32::<N>(transmute(a), transmute(b))) }
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v16i8.p0")]
+        fn _vst1q_s8_x2(ptr: *mut i8, a: int8x16_t, b: int8x16_t);
+    }
+    _vst1q_s8_x2(a, b.0, b.1)
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsli_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    static_assert!(N >= 0 && N <= 63);
-    unsafe { transmute(vshiftlins_v1i64::<N>(transmute(a), transmute(b))) }
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v4i16.p0")]
+        fn _vst1_s16_x2(ptr: *mut i16, a: int16x4_t, b: int16x4_t);
+    }
+    _vst1_s16_x2(a, b.0, b.1)
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsliq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    static_assert!(N >= 0 && N <= 63);
-    unsafe { transmute(vshiftlins_v2i64::<N>(transmute(a), transmute(b))) }
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v8i16.p0")]
+        fn _vst1q_s16_x2(ptr: *mut i16, a: int16x8_t, b: int16x8_t);
+    }
+    _vst1q_s16_x2(a, b.0, b.1)
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p8)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsli_n_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe { transmute(vshiftlins_v8i8::<N>(transmute(a), transmute(b))) }
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v2i32.p0")]
+        fn _vst1_s32_x2(ptr: *mut i32, a: int32x2_t, b: int32x2_t);
+    }
+    _vst1_s32_x2(a, b.0, b.1)
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p8)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsliq_n_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe { transmute(vshiftlins_v16i8::<N>(transmute(a), transmute(b))) }
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v4i32.p0")]
+        fn _vst1q_s32_x2(ptr: *mut i32, a: int32x4_t, b: int32x4_t);
+    }
+    _vst1q_s32_x2(a, b.0, b.1)
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p16)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsli_n_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe { transmute(vshiftlins_v4i16::<N>(transmute(a), transmute(b))) }
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v1i64.p0")]
+        fn _vst1_s64_x2(ptr: *mut i64, a: int64x1_t, b: int64x1_t);
+    }
+    _vst1_s64_x2(a, b.0, b.1)
 }
-#[doc = "Shift Left and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p16)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsliq_n_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe { transmute(vshiftlins_v8i16::<N>(transmute(a), transmute(b))) }
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v2i64.p0")]
+        fn _vst1q_s64_x2(ptr: *mut i64, a: int64x2_t, b: int64x2_t);
+    }
+    _vst1q_s64_x2(a, b.0, b.1)
 }
-#[doc = "Signed shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s8)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ssra, N = 2)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vsra_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe { simd_add(a, vshr_n_s8::<N>(b)) }
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x3.v8i8.p0"
+        )]
+        fn _vst1_s8_x3(a: int8x8_t, b: int8x8_t, c: int8x8_t, ptr: *mut i8);
+    }
+    _vst1_s8_x3(b.0, b.1, b.2, a)
 }
-#[doc = "Signed shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s8)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ssra, N = 2)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vsraq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe { simd_add(a, vshrq_n_s8::<N>(b)) }
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x3.v16i8.p0"
+        )]
+        fn _vst1q_s8_x3(a: int8x16_t, b: int8x16_t, c: int8x16_t, ptr: *mut i8);
+    }
+    _vst1q_s8_x3(b.0, b.1, b.2, a)
 }
-#[doc = "Signed shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s16)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ssra, N = 2)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vsra_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_add(a, vshr_n_s16::<N>(b)) }
-}
-#[doc = "Signed shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ssra, N = 2)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vsraq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_add(a, vshrq_n_s16::<N>(b)) }
-}
-#[doc = "Signed shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ssra, N = 2)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vsra_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_add(a, vshr_n_s32::<N>(b)) }
-}
-#[doc = "Signed shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ssra, N = 2)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vsraq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_add(a, vshrq_n_s32::<N>(b)) }
-}
-#[doc = "Signed shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ssra, N = 2)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vsra_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    unsafe { simd_add(a, vshr_n_s64::<N>(b)) }
-}
-#[doc = "Signed shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ssra, N = 2)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vsraq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    unsafe { simd_add(a, vshrq_n_s64::<N>(b)) }
-}
-#[doc = "Unsigned shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(usra, N = 2)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vsra_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe { simd_add(a, vshr_n_u8::<N>(b)) }
-}
-#[doc = "Unsigned shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(usra, N = 2)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vsraq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe { simd_add(a, vshrq_n_u8::<N>(b)) }
-}
-#[doc = "Unsigned shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(usra, N = 2)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vsra_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_add(a, vshr_n_u16::<N>(b)) }
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x3.v4i16.p0"
+        )]
+        fn _vst1_s16_x3(a: int16x4_t, b: int16x4_t, c: int16x4_t, ptr: *mut i16);
+    }
+    _vst1_s16_x3(b.0, b.1, b.2, a)
 }
-#[doc = "Unsigned shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u16)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(usra, N = 2)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vsraq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_add(a, vshrq_n_u16::<N>(b)) }
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x3.v8i16.p0"
+        )]
+        fn _vst1q_s16_x3(a: int16x8_t, b: int16x8_t, c: int16x8_t, ptr: *mut i16);
+    }
+    _vst1q_s16_x3(b.0, b.1, b.2, a)
 }
-#[doc = "Unsigned shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u32)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(usra, N = 2)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vsra_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_add(a, vshr_n_u32::<N>(b)) }
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x3.v2i32.p0"
+        )]
+        fn _vst1_s32_x3(a: int32x2_t, b: int32x2_t, c: int32x2_t, ptr: *mut i32);
+    }
+    _vst1_s32_x3(b.0, b.1, b.2, a)
 }
-#[doc = "Unsigned shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u32)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(usra, N = 2)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vsraq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_add(a, vshrq_n_u32::<N>(b)) }
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x3.v4i32.p0"
+        )]
+        fn _vst1q_s32_x3(a: int32x4_t, b: int32x4_t, c: int32x4_t, ptr: *mut i32);
+    }
+    _vst1q_s32_x3(b.0, b.1, b.2, a)
 }
-#[doc = "Unsigned shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(usra, N = 2)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vsra_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    unsafe { simd_add(a, vshr_n_u64::<N>(b)) }
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x3.v1i64.p0"
+        )]
+        fn _vst1_s64_x3(a: int64x1_t, b: int64x1_t, c: int64x1_t, ptr: *mut i64);
+    }
+    _vst1_s64_x3(b.0, b.1, b.2, a)
 }
-#[doc = "Unsigned shift right and accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(usra, N = 2)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vsraq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    unsafe { simd_add(a, vshrq_n_u64::<N>(b)) }
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x3.v2i64.p0"
+        )]
+        fn _vst1q_s64_x3(a: int64x2_t, b: int64x2_t, c: int64x2_t, ptr: *mut i64);
+    }
+    _vst1q_s64_x3(b.0, b.1, b.2, a)
 }
-#[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s8)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsri_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    static_assert!(1 <= N && N <= 8);
-    vshiftrins_v8i8::<N>(a, b)
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v8i8.p0")]
+        fn _vst1_s8_x3(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t);
+    }
+    _vst1_s8_x3(a, b.0, b.1, b.2)
 }
-#[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s8)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsriq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    static_assert!(1 <= N && N <= 8);
-    vshiftrins_v16i8::<N>(a, b)
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v16i8.p0")]
+        fn _vst1q_s8_x3(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t);
+    }
+    _vst1q_s8_x3(a, b.0, b.1, b.2)
 }
-#[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s16)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsri_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert!(1 <= N && N <= 16);
-    vshiftrins_v4i16::<N>(a, b)
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v4i16.p0")]
+        fn _vst1_s16_x3(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t);
+    }
+    _vst1_s16_x3(a, b.0, b.1, b.2)
 }
-#[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s16)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsriq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert!(1 <= N && N <= 16);
-    vshiftrins_v8i16::<N>(a, b)
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v8i16.p0")]
+        fn _vst1q_s16_x3(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t);
+    }
+    _vst1q_s16_x3(a, b.0, b.1, b.2)
 }
-#[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s32)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsri_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert!(1 <= N && N <= 32);
-    vshiftrins_v2i32::<N>(a, b)
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v2i32.p0")]
+        fn _vst1_s32_x3(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t);
+    }
+    _vst1_s32_x3(a, b.0, b.1, b.2)
 }
-#[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s32)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsriq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert!(1 <= N && N <= 32);
-    vshiftrins_v4i32::<N>(a, b)
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v4i32.p0")]
+        fn _vst1q_s32_x3(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t);
+    }
+    _vst1q_s32_x3(a, b.0, b.1, b.2)
 }
-#[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsri_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    static_assert!(1 <= N && N <= 64);
-    vshiftrins_v1i64::<N>(a, b)
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v1i64.p0")]
+        fn _vst1_s64_x3(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t);
+    }
+    _vst1_s64_x3(a, b.0, b.1, b.2)
 }
-#[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsriq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    static_assert!(1 <= N && N <= 64);
-    vshiftrins_v2i64::<N>(a, b)
-}
-#[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u8)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsri_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    static_assert!(1 <= N && N <= 8);
-    unsafe { transmute(vshiftrins_v8i8::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u8)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsriq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    static_assert!(1 <= N && N <= 8);
-    unsafe { transmute(vshiftrins_v16i8::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u16)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsri_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    static_assert!(1 <= N && N <= 16);
-    unsafe { transmute(vshiftrins_v4i16::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u16)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsriq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    static_assert!(1 <= N && N <= 16);
-    unsafe { transmute(vshiftrins_v8i16::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u32)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsri_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    static_assert!(1 <= N && N <= 32);
-    unsafe { transmute(vshiftrins_v2i32::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u32)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsriq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    static_assert!(1 <= N && N <= 32);
-    unsafe { transmute(vshiftrins_v4i32::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u64)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsri_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    static_assert!(1 <= N && N <= 64);
-    unsafe { transmute(vshiftrins_v1i64::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u64)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsriq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    static_assert!(1 <= N && N <= 64);
-    unsafe { transmute(vshiftrins_v2i64::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p8)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsri_n_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    static_assert!(1 <= N && N <= 8);
-    unsafe { transmute(vshiftrins_v8i8::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p8)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsriq_n_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    static_assert!(1 <= N && N <= 8);
-    unsafe { transmute(vshiftrins_v16i8::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p16)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsri_n_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    static_assert!(1 <= N && N <= 16);
-    unsafe { transmute(vshiftrins_v4i16::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p16)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))]
-#[rustc_legacy_const_generics(2)]
-pub fn vsriq_n_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    static_assert!(1 <= N && N <= 16);
-    unsafe { transmute(vshiftrins_v8i16::<N>(transmute(a), transmute(b))) }
-}
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
-pub unsafe fn vst1_f16(ptr: *mut f16, a: float16x4_t) {
-    vst1_v4f16(
-        ptr as *const i8,
-        transmute(a),
-        crate::mem::align_of::<f16>() as i32,
-    )
-}
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
-pub unsafe fn vst1q_f16(ptr: *mut f16, a: float16x8_t) {
-    vst1q_v8f16(
-        ptr as *const i8,
-        transmute(a),
-        crate::mem::align_of::<f16>() as i32,
-    )
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v2i64.p0")]
+        fn _vst1q_s64_x3(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t);
+    }
+    _vst1q_s64_x3(a, b.0, b.1, b.2)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x2)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(vst1))]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vst1_f16_x2(a: *mut f16, b: float16x4x2_t) {
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0.v4f16")]
-        fn _vst1_f16_x2(ptr: *mut f16, a: float16x4_t, b: float16x4_t);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x4.v8i8.p0"
+        )]
+        fn _vst1_s8_x4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, ptr: *mut i8);
     }
-    _vst1_f16_x2(a, b.0, b.1)
+    _vst1_s8_x4(b.0, b.1, b.2, b.3, a)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x2)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(vst1))]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vst1q_f16_x2(a: *mut f16, b: float16x8x2_t) {
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0.v8f16")]
-        fn _vst1q_f16_x2(ptr: *mut f16, a: float16x8_t, b: float16x8_t);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x4.v16i8.p0"
+        )]
+        fn _vst1q_s8_x4(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, ptr: *mut i8);
     }
-    _vst1q_f16_x2(a, b.0, b.1)
+    _vst1q_s8_x4(b.0, b.1, b.2, b.3, a)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x2)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vst1_f16_x2(a: *mut f16, b: float16x4x2_t) {
+pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x2.v4f16.p0"
+            link_name = "llvm.aarch64.neon.st1x4.v4i16.p0"
         )]
-        fn _vst1_f16_x2(a: float16x4_t, b: float16x4_t, ptr: *mut f16);
+        fn _vst1_s16_x4(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, ptr: *mut i16);
     }
-    _vst1_f16_x2(b.0, b.1, a)
+    _vst1_s16_x4(b.0, b.1, b.2, b.3, a)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x2)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vst1q_f16_x2(a: *mut f16, b: float16x8x2_t) {
+pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x2.v8f16.p0"
+            link_name = "llvm.aarch64.neon.st1x4.v8i16.p0"
         )]
-        fn _vst1q_f16_x2(a: float16x8_t, b: float16x8_t, ptr: *mut f16);
+        fn _vst1q_s16_x4(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, ptr: *mut i16);
     }
-    _vst1q_f16_x2(b.0, b.1, a)
+    _vst1q_s16_x4(b.0, b.1, b.2, b.3, a)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x3)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(vst1))]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vst1_f16_x3(a: *mut f16, b: float16x4x3_t) {
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v4f16")]
-        fn _vst1_f16_x3(ptr: *mut f16, a: float16x4_t, b: float16x4_t, c: float16x4_t);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x4.v2i32.p0"
+        )]
+        fn _vst1_s32_x4(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, ptr: *mut i32);
     }
-    _vst1_f16_x3(a, b.0, b.1, b.2)
+    _vst1_s32_x4(b.0, b.1, b.2, b.3, a)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x3)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(vst1))]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vst1q_f16_x3(a: *mut f16, b: float16x8x3_t) {
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v8f16")]
-        fn _vst1q_f16_x3(ptr: *mut f16, a: float16x8_t, b: float16x8_t, c: float16x8_t);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x4.v4i32.p0"
+        )]
+        fn _vst1q_s32_x4(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, ptr: *mut i32);
     }
-    _vst1q_f16_x3(a, b.0, b.1, b.2)
+    _vst1q_s32_x4(b.0, b.1, b.2, b.3, a)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x3)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vst1_f16_x3(a: *mut f16, b: float16x4x3_t) {
+pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x3.v4f16.p0"
+            link_name = "llvm.aarch64.neon.st1x4.v1i64.p0"
         )]
-        fn _vst1_f16_x3(a: float16x4_t, b: float16x4_t, c: float16x4_t, ptr: *mut f16);
+        fn _vst1_s64_x4(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, ptr: *mut i64);
     }
-    _vst1_f16_x3(b.0, b.1, b.2, a)
+    _vst1_s64_x4(b.0, b.1, b.2, b.3, a)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x3)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st1))]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vst1q_f16_x3(a: *mut f16, b: float16x8x3_t) {
+pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x3.v8f16.p0"
+            link_name = "llvm.aarch64.neon.st1x4.v2i64.p0"
         )]
-        fn _vst1q_f16_x3(a: float16x8_t, b: float16x8_t, c: float16x8_t, ptr: *mut f16);
+        fn _vst1q_s64_x4(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, ptr: *mut i64);
     }
-    _vst1q_f16_x3(b.0, b.1, b.2, a)
+    _vst1q_s64_x4(b.0, b.1, b.2, b.3, a)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x4)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1_f16_x4(a: *mut f16, b: float16x4x4_t) {
+pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4f16")]
-        fn _vst1_f16_x4(
-            ptr: *mut f16,
-            a: float16x4_t,
-            b: float16x4_t,
-            c: float16x4_t,
-            d: float16x4_t,
-        );
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v8i8.p0")]
+        fn _vst1_s8_x4(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t);
     }
-    _vst1_f16_x4(a, b.0, b.1, b.2, b.3)
+    _vst1_s8_x4(a, b.0, b.1, b.2, b.3)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x4)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) {
+pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v8f16")]
-        fn _vst1q_f16_x4(
-            ptr: *mut f16,
-            a: float16x8_t,
-            b: float16x8_t,
-            c: float16x8_t,
-            d: float16x8_t,
-        );
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v16i8.p0")]
+        fn _vst1q_s8_x4(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t);
     }
-    _vst1q_f16_x4(a, b.0, b.1, b.2, b.3)
+    _vst1q_s8_x4(a, b.0, b.1, b.2, b.3)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x4)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(st1))]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vst1_f16_x4(a: *mut f16, b: float16x4x4_t) {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x4.v4f16.p0"
-        )]
-        fn _vst1_f16_x4(
-            a: float16x4_t,
-            b: float16x4_t,
-            c: float16x4_t,
-            d: float16x4_t,
-            ptr: *mut f16,
-        );
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4i16.p0")]
+        fn _vst1_s16_x4(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t);
     }
-    _vst1_f16_x4(b.0, b.1, b.2, b.3, a)
+    _vst1_s16_x4(a, b.0, b.1, b.2, b.3)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x4)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(st1))]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x4.v8f16.p0"
-        )]
-        fn _vst1q_f16_x4(
-            a: float16x8_t,
-            b: float16x8_t,
-            c: float16x8_t,
-            d: float16x8_t,
-            ptr: *mut f16,
-        );
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v8i16.p0")]
+        fn _vst1q_s16_x4(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t);
     }
-    _vst1q_f16_x4(b.0, b.1, b.2, b.3, a)
+    _vst1q_s16_x4(a, b.0, b.1, b.2, b.3)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
-pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) {
-    const ALIGN: i32 = crate::mem::align_of::<f32>() as i32;
-    vst1_v2f32::<ALIGN>(ptr as *const i8, transmute(a))
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v2i32.p0")]
+        fn _vst1_s32_x4(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t);
+    }
+    _vst1_s32_x4(a, b.0, b.1, b.2, b.3)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
-pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) {
-    const ALIGN: i32 = crate::mem::align_of::<f32>() as i32;
-    vst1q_v4f32::<ALIGN>(ptr as *const i8, transmute(a))
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4i32.p0")]
+        fn _vst1q_s32_x4(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t);
+    }
+    _vst1q_s32_x4(a, b.0, b.1, b.2, b.3)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
-pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) {
-    const ALIGN: i32 = crate::mem::align_of::<i8>() as i32;
-    vst1_v8i8::<ALIGN>(ptr as *const i8, a)
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v1i64.p0")]
+        fn _vst1_s64_x4(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t);
+    }
+    _vst1_s64_x4(a, b.0, b.1, b.2, b.3)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
-pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) {
-    const ALIGN: i32 = crate::mem::align_of::<i8>() as i32;
-    vst1q_v16i8::<ALIGN>(ptr as *const i8, a)
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v2i64.p0")]
+        fn _vst1q_s64_x4(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t);
+    }
+    _vst1q_s64_x4(a, b.0, b.1, b.2, b.3)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
-pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) {
-    const ALIGN: i32 = crate::mem::align_of::<i16>() as i32;
-    vst1_v4i16::<ALIGN>(ptr as *const i8, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_u8_x2(a: *mut u8, b: uint8x8x2_t) {
+    vst1_s8_x2(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
-pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) {
-    const ALIGN: i32 = crate::mem::align_of::<i16>() as i32;
-    vst1q_v8i16::<ALIGN>(ptr as *const i8, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_u8_x3(a: *mut u8, b: uint8x8x3_t) {
+    vst1_s8_x3(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
-pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) {
-    const ALIGN: i32 = crate::mem::align_of::<i32>() as i32;
-    vst1_v2i32::<ALIGN>(ptr as *const i8, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_u8_x4(a: *mut u8, b: uint8x8x4_t) {
+    vst1_s8_x4(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
-pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) {
-    const ALIGN: i32 = crate::mem::align_of::<i32>() as i32;
-    vst1q_v4i32::<ALIGN>(ptr as *const i8, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_u8_x2(a: *mut u8, b: uint8x16x2_t) {
+    vst1q_s8_x2(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
-pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) {
-    const ALIGN: i32 = crate::mem::align_of::<i64>() as i32;
-    vst1_v1i64::<ALIGN>(ptr as *const i8, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_u8_x3(a: *mut u8, b: uint8x16x3_t) {
+    vst1q_s8_x3(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
-pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) {
-    const ALIGN: i32 = crate::mem::align_of::<i64>() as i32;
-    vst1q_v2i64::<ALIGN>(ptr as *const i8, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_u8_x4(a: *mut u8, b: uint8x16x4_t) {
+    vst1q_s8_x4(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
-pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) {
-    const ALIGN: i32 = crate::mem::align_of::<u8>() as i32;
-    vst1_v8i8::<ALIGN>(ptr as *const i8, transmute(a))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_u16_x2(a: *mut u16, b: uint16x4x2_t) {
+    vst1_s16_x2(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
-pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) {
-    const ALIGN: i32 = crate::mem::align_of::<u8>() as i32;
-    vst1q_v16i8::<ALIGN>(ptr as *const i8, transmute(a))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_u16_x3(a: *mut u16, b: uint16x4x3_t) {
+    vst1_s16_x3(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
-pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) {
-    const ALIGN: i32 = crate::mem::align_of::<u16>() as i32;
-    vst1_v4i16::<ALIGN>(ptr as *const i8, transmute(a))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_u16_x4(a: *mut u16, b: uint16x4x4_t) {
+    vst1_s16_x4(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
-pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) {
-    const ALIGN: i32 = crate::mem::align_of::<u16>() as i32;
-    vst1q_v8i16::<ALIGN>(ptr as *const i8, transmute(a))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_u16_x2(a: *mut u16, b: uint16x8x2_t) {
+    vst1q_s16_x2(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
-pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) {
-    const ALIGN: i32 = crate::mem::align_of::<u32>() as i32;
-    vst1_v2i32::<ALIGN>(ptr as *const i8, transmute(a))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_u16_x3(a: *mut u16, b: uint16x8x3_t) {
+    vst1q_s16_x3(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
-pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) {
-    const ALIGN: i32 = crate::mem::align_of::<u32>() as i32;
-    vst1q_v4i32::<ALIGN>(ptr as *const i8, transmute(a))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_u16_x4(a: *mut u16, b: uint16x8x4_t) {
+    vst1q_s16_x4(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
-pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) {
-    const ALIGN: i32 = crate::mem::align_of::<u64>() as i32;
-    vst1_v1i64::<ALIGN>(ptr as *const i8, transmute(a))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_u32_x2(a: *mut u32, b: uint32x2x2_t) {
+    vst1_s32_x2(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
-pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) {
-    const ALIGN: i32 = crate::mem::align_of::<u64>() as i32;
-    vst1q_v2i64::<ALIGN>(ptr as *const i8, transmute(a))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_u32_x3(a: *mut u32, b: uint32x2x3_t) {
+    vst1_s32_x3(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
-pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) {
-    const ALIGN: i32 = crate::mem::align_of::<p8>() as i32;
-    vst1_v8i8::<ALIGN>(ptr as *const i8, transmute(a))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_u32_x4(a: *mut u32, b: uint32x2x4_t) {
+    vst1_s32_x4(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
-pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) {
-    const ALIGN: i32 = crate::mem::align_of::<p8>() as i32;
-    vst1q_v16i8::<ALIGN>(ptr as *const i8, transmute(a))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_u32_x2(a: *mut u32, b: uint32x4x2_t) {
+    vst1q_s32_x2(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
-pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) {
-    const ALIGN: i32 = crate::mem::align_of::<p16>() as i32;
-    vst1_v4i16::<ALIGN>(ptr as *const i8, transmute(a))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_u32_x3(a: *mut u32, b: uint32x4x3_t) {
+    vst1q_s32_x3(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
-pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) {
-    const ALIGN: i32 = crate::mem::align_of::<p16>() as i32;
-    vst1q_v8i16::<ALIGN>(ptr as *const i8, transmute(a))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_u32_x4(a: *mut u32, b: uint32x4x4_t) {
+    vst1q_s32_x4(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
-pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) {
-    const ALIGN: i32 = crate::mem::align_of::<p64>() as i32;
-    vst1_v1i64::<ALIGN>(ptr as *const i8, transmute(a))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_u64_x2(a: *mut u64, b: uint64x1x2_t) {
+    vst1_s64_x2(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
-pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) {
-    const ALIGN: i32 = crate::mem::align_of::<p64>() as i32;
-    vst1q_v2i64::<ALIGN>(ptr as *const i8, transmute(a))
-}
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v2f32.p0")]
-        fn _vst1_f32_x2(ptr: *mut f32, a: float32x2_t, b: float32x2_t);
-    }
-    _vst1_f32_x2(a, b.0, b.1)
-}
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v4f32.p0")]
-        fn _vst1q_f32_x2(ptr: *mut f32, a: float32x4_t, b: float32x4_t);
-    }
-    _vst1q_f32_x2(a, b.0, b.1)
-}
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x2.v2f32.p0"
-        )]
-        fn _vst1_f32_x2(a: float32x2_t, b: float32x2_t, ptr: *mut f32);
-    }
-    _vst1_f32_x2(b.0, b.1, a)
-}
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x2.v4f32.p0"
-        )]
-        fn _vst1q_f32_x2(a: float32x4_t, b: float32x4_t, ptr: *mut f32);
-    }
-    _vst1q_f32_x2(b.0, b.1, a)
-}
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x3.v2f32.p0"
-        )]
-        fn _vst1_f32_x3(a: float32x2_t, b: float32x2_t, c: float32x2_t, ptr: *mut f32);
-    }
-    _vst1_f32_x3(b.0, b.1, b.2, a)
-}
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x3.v4f32.p0"
-        )]
-        fn _vst1q_f32_x3(a: float32x4_t, b: float32x4_t, c: float32x4_t, ptr: *mut f32);
-    }
-    _vst1q_f32_x3(b.0, b.1, b.2, a)
-}
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v2f32.p0")]
-        fn _vst1_f32_x4(
-            ptr: *mut f32,
-            a: float32x2_t,
-            b: float32x2_t,
-            c: float32x2_t,
-            d: float32x2_t,
-        );
-    }
-    _vst1_f32_x4(a, b.0, b.1, b.2, b.3)
-}
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4f32.p0")]
-        fn _vst1q_f32_x4(
-            ptr: *mut f32,
-            a: float32x4_t,
-            b: float32x4_t,
-            c: float32x4_t,
-            d: float32x4_t,
-        );
-    }
-    _vst1q_f32_x4(a, b.0, b.1, b.2, b.3)
-}
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x4.v2f32.p0"
-        )]
-        fn _vst1_f32_x4(
-            a: float32x2_t,
-            b: float32x2_t,
-            c: float32x2_t,
-            d: float32x2_t,
-            ptr: *mut f32,
-        );
-    }
-    _vst1_f32_x4(b.0, b.1, b.2, b.3, a)
-}
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x4.v4f32.p0"
-        )]
-        fn _vst1q_f32_x4(
-            a: float32x4_t,
-            b: float32x4_t,
-            c: float32x4_t,
-            d: float32x4_t,
-            ptr: *mut f32,
-        );
-    }
-    _vst1q_f32_x4(b.0, b.1, b.2, b.3, a)
-}
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vst1_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    *a = simd_extract!(b, LANE as u32);
-}
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vst1q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    *a = simd_extract!(b, LANE as u32);
-}
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(st1)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -59170,23 +60439,21 @@ pub unsafe fn vst1q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    *a = simd_extract!(b, LANE as u32);
+pub unsafe fn vst1_u64_x3(a: *mut u64, b: uint64x1x3_t) {
+    vst1_s64_x3(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f32)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(st1)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -59195,23 +60462,21 @@ pub unsafe fn vst1_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    *a = simd_extract!(b, LANE as u32);
+pub unsafe fn vst1_u64_x4(a: *mut u64, b: uint64x1x4_t) {
+    vst1_s64_x4(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s8)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(st1)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -59220,23 +60485,21 @@ pub unsafe fn vst1q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    *a = simd_extract!(b, LANE as u32);
+pub unsafe fn vst1q_u64_x2(a: *mut u64, b: uint64x2x2_t) {
+    vst1q_s64_x2(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s8)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(st1)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -59245,23 +60508,21 @@ pub unsafe fn vst1_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16_t) {
-    static_assert_uimm_bits!(LANE, 4);
-    *a = simd_extract!(b, LANE as u32);
+pub unsafe fn vst1q_u64_x3(a: *mut u64, b: uint64x2x3_t) {
+    vst1q_s64_x3(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s16)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(st1)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -59270,23 +60531,21 @@ pub unsafe fn vst1q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    *a = simd_extract!(b, LANE as u32);
+pub unsafe fn vst1q_u64_x4(a: *mut u64, b: uint64x2x4_t) {
+    vst1q_s64_x4(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s16)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(st1)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -59295,23 +60554,21 @@ pub unsafe fn vst1_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    *a = simd_extract!(b, LANE as u32);
+pub unsafe fn vst1_p8_x2(a: *mut p8, b: poly8x8x2_t) {
+    vst1_s8_x2(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s32)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(st1)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -59320,23 +60577,21 @@ pub unsafe fn vst1q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    *a = simd_extract!(b, LANE as u32);
+pub unsafe fn vst1_p8_x3(a: *mut p8, b: poly8x8x3_t) {
+    vst1_s8_x3(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s32)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(st1)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -59345,23 +60600,21 @@ pub unsafe fn vst1_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    *a = simd_extract!(b, LANE as u32);
+pub unsafe fn vst1_p8_x4(a: *mut p8, b: poly8x8x4_t) {
+    vst1_s8_x4(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s64)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(st1)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -59370,23 +60623,21 @@ pub unsafe fn vst1q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    *a = simd_extract!(b, LANE as u32);
+pub unsafe fn vst1q_p8_x2(a: *mut p8, b: poly8x16x2_t) {
+    vst1q_s8_x2(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u8)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(st1)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -59395,23 +60646,21 @@ pub unsafe fn vst1q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    *a = simd_extract!(b, LANE as u32);
+pub unsafe fn vst1q_p8_x3(a: *mut p8, b: poly8x16x3_t) {
+    vst1q_s8_x3(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u8)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(st1)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -59420,23 +60669,21 @@ pub unsafe fn vst1_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1q_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x16_t) {
-    static_assert_uimm_bits!(LANE, 4);
-    *a = simd_extract!(b, LANE as u32);
+pub unsafe fn vst1q_p8_x4(a: *mut p8, b: poly8x16x4_t) {
+    vst1q_s8_x4(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u16)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(st1)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -59445,23 +60692,21 @@ pub unsafe fn vst1q_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x16_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    *a = simd_extract!(b, LANE as u32);
+pub unsafe fn vst1_p16_x2(a: *mut p16, b: poly16x4x2_t) {
+    vst1_s16_x2(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u16)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(st1)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -59470,23 +60715,21 @@ pub unsafe fn vst1_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    *a = simd_extract!(b, LANE as u32);
+pub unsafe fn vst1_p16_x3(a: *mut p16, b: poly16x4x3_t) {
+    vst1_s16_x3(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u32)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(st1)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -59495,23 +60738,21 @@ pub unsafe fn vst1q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    *a = simd_extract!(b, LANE as u32);
+pub unsafe fn vst1_p16_x4(a: *mut p16, b: poly16x4x4_t) {
+    vst1_s16_x4(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u32)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(st1)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -59520,23 +60761,21 @@ pub unsafe fn vst1_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    *a = simd_extract!(b, LANE as u32);
+pub unsafe fn vst1q_p16_x2(a: *mut p16, b: poly16x8x2_t) {
+    vst1q_s16_x2(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u64)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(st1)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -59545,23 +60784,21 @@ pub unsafe fn vst1q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1q_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    *a = simd_extract!(b, LANE as u32);
+pub unsafe fn vst1q_p16_x3(a: *mut p16, b: poly16x8x3_t) {
+    vst1q_s16_x3(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p8)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
+    assert_instr(st1)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -59570,287 +60807,198 @@ pub unsafe fn vst1q_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x2_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x8_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    *a = simd_extract!(b, LANE as u32);
+pub unsafe fn vst1q_p16_x4(a: *mut p16, b: poly16x8x4_t) {
+    vst1q_s16_x4(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
-)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64", ALIGN = 0))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1q_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x16_t) {
-    static_assert_uimm_bits!(LANE, 4);
-    *a = simd_extract!(b, LANE as u32);
-}
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+unsafe fn vst1_v1i64<const ALIGN: i32>(addr: *const i8, val: int64x1_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v1i64.p0")]
+        fn _vst1_v1i64(addr: *const i8, val: int64x1_t, align: i32);
+    }
+    _vst1_v1i64(addr, val, ALIGN)
+}
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
-)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    *a = simd_extract!(b, LANE as u32);
+unsafe fn vst1_v2f32<const ALIGN: i32>(addr: *const i8, val: float32x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2f32.p0")]
+        fn _vst1_v2f32(addr: *const i8, val: float32x2_t, align: i32);
+    }
+    _vst1_v2f32(addr, val, ALIGN)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
-)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1q_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x8_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    *a = simd_extract!(b, LANE as u32);
+unsafe fn vst1_v2i32<const ALIGN: i32>(addr: *const i8, val: int32x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2i32.p0")]
+        fn _vst1_v2i32(addr: *const i8, val: int32x2_t, align: i32);
+    }
+    _vst1_v2i32(addr, val, ALIGN)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
-)]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16", ALIGN = 0))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x1_t) {
-    static_assert!(LANE == 0);
-    *a = simd_extract!(b, LANE as u32);
+unsafe fn vst1_v4i16<const ALIGN: i32>(addr: *const i8, val: int16x4_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4i16.p0")]
+        fn _vst1_v4i16(addr: *const i8, val: int16x4_t, align: i32);
+    }
+    _vst1_v4i16(addr, val, ALIGN)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
-)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8", ALIGN = 0))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1_t) {
-    static_assert!(LANE == 0);
-    *a = simd_extract!(b, LANE as u32);
+unsafe fn vst1_v8i8<const ALIGN: i32>(addr: *const i8, val: int8x8_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8i8.p0")]
+        fn _vst1_v8i8(addr: *const i8, val: int8x8_t, align: i32);
+    }
+    _vst1_v8i8(addr, val, ALIGN)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
-)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8", ALIGN = 0))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1_t) {
-    static_assert!(LANE == 0);
-    *a = simd_extract!(b, LANE as u32);
+unsafe fn vst1q_v16i8<const ALIGN: i32>(addr: *const i8, val: int8x16_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v16i8.p0")]
+        fn _vst1q_v16i8(addr: *const i8, val: int8x16_t, align: i32);
+    }
+    _vst1q_v16i8(addr, val, ALIGN)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1_p64_x2(a: *mut p64, b: poly64x1x2_t) {
-    vst1_s64_x2(transmute(a), transmute(b))
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64", ALIGN = 0))]
+#[rustc_legacy_const_generics(2)]
+unsafe fn vst1q_v2i64<const ALIGN: i32>(addr: *const i8, val: int64x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2i64.p0")]
+        fn _vst1q_v2i64(addr: *const i8, val: int64x2_t, align: i32);
+    }
+    _vst1q_v2i64(addr, val, ALIGN)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1_p64_x3(a: *mut p64, b: poly64x1x3_t) {
-    vst1_s64_x3(transmute(a), transmute(b))
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))]
+#[rustc_legacy_const_generics(2)]
+unsafe fn vst1q_v4f32<const ALIGN: i32>(addr: *const i8, val: float32x4_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4f32.p0")]
+        fn _vst1q_v4f32(addr: *const i8, val: float32x4_t, align: i32);
+    }
+    _vst1q_v4f32(addr, val, ALIGN)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1_p64_x4(a: *mut p64, b: poly64x1x4_t) {
-    vst1_s64_x4(transmute(a), transmute(b))
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))]
+#[rustc_legacy_const_generics(2)]
+unsafe fn vst1q_v4i32<const ALIGN: i32>(addr: *const i8, val: int32x4_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4i32.p0")]
+        fn _vst1q_v4i32(addr: *const i8, val: int32x4_t, align: i32);
+    }
+    _vst1q_v4i32(addr, val, ALIGN)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x2)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16", ALIGN = 0))]
+#[rustc_legacy_const_generics(2)]
+unsafe fn vst1q_v8i16<const ALIGN: i32>(addr: *const i8, val: int16x8_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8i16.p0")]
+        fn _vst1q_v8i16(addr: *const i8, val: int16x8_t, align: i32);
+    }
+    _vst1q_v8i16(addr, val, ALIGN)
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v4f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1q_p64_x2(a: *mut p64, b: poly64x2x2_t) {
-    vst1q_s64_x2(transmute(a), transmute(b))
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
+unsafe fn vst1_v4f16(addr: *const i8, val: float16x4_t, align: i32) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4f16.p0")]
+        fn _vst1_v4f16(addr: *const i8, val: float16x4_t, align: i32);
+    }
+    _vst1_v4f16(addr, val, align)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x3)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v8f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1q_p64_x3(a: *mut p64, b: poly64x2x3_t) {
-    vst1q_s64_x3(transmute(a), transmute(b))
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
+unsafe fn vst1q_v8f16(addr: *const i8, val: float16x8_t, align: i32) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8f16.p0")]
+        fn _vst1q_v8f16(addr: *const i8, val: float16x8_t, align: i32);
+    }
+    _vst1q_v8f16(addr, val, align)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x4)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
+    assert_instr(nop, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -59859,861 +61007,688 @@ pub unsafe fn vst1q_p64_x3(a: *mut p64, b: poly64x2x3_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1q_p64_x4(a: *mut p64, b: poly64x2x4_t) {
-    vst1q_s64_x4(transmute(a), transmute(b))
+pub unsafe fn vst1q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x2)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
-pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) {
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(st2))]
+pub unsafe fn vst2_f16(a: *mut f16, b: float16x4x2_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x2.v8i8.p0"
+            link_name = "llvm.aarch64.neon.st2.v4f16.p0"
         )]
-        fn _vst1_s8_x2(a: int8x8_t, b: int8x8_t, ptr: *mut i8);
+        fn _vst2_f16(a: float16x4_t, b: float16x4_t, ptr: *mut i8);
     }
-    _vst1_s8_x2(b.0, b.1, a)
+    _vst2_f16(b.0, b.1, a as _)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x2)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
-pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) {
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(st2))]
+pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x2.v16i8.p0"
+            link_name = "llvm.aarch64.neon.st2.v8f16.p0"
         )]
-        fn _vst1q_s8_x2(a: int8x16_t, b: int8x16_t, ptr: *mut i8);
+        fn _vst2q_f16(a: float16x8_t, b: float16x8_t, ptr: *mut i8);
     }
-    _vst1q_s8_x2(b.0, b.1, a)
+    _vst2q_f16(b.0, b.1, a as _)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x2)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
-pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) {
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(vst2))]
+pub unsafe fn vst2_f16(a: *mut f16, b: float16x4x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x2.v4i16.p0"
-        )]
-        fn _vst1_s16_x2(a: int16x4_t, b: int16x4_t, ptr: *mut i16);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0.v4f16")]
+        fn _vst2_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, size: i32);
     }
-    _vst1_s16_x2(b.0, b.1, a)
+    _vst2_f16(a as _, b.0, b.1, 2)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x2)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
-pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) {
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(vst2))]
+pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x2.v8i16.p0"
-        )]
-        fn _vst1q_s16_x2(a: int16x8_t, b: int16x8_t, ptr: *mut i16);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0.v8f16")]
+        fn _vst2q_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, size: i32);
     }
-    _vst1q_s16_x2(b.0, b.1, a)
+    _vst2q_f16(a as _, b.0, b.1, 2)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x2)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
-pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x2.v2i32.p0"
-        )]
-        fn _vst1_s32_x2(a: int32x2_t, b: int32x2_t, ptr: *mut i32);
-    }
-    _vst1_s32_x2(b.0, b.1, a)
+#[cfg_attr(test, assert_instr(st2))]
+pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) {
+    crate::core_arch::macros::interleaving_store!(f32, 2, 2, a, b)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x2)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
-pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x2.v4i32.p0"
-        )]
-        fn _vst1q_s32_x2(a: int32x4_t, b: int32x4_t, ptr: *mut i32);
-    }
-    _vst1q_s32_x2(b.0, b.1, a)
+#[cfg_attr(test, assert_instr(st2))]
+pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) {
+    crate::core_arch::macros::interleaving_store!(f32, 4, 2, a, b)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x2)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
-pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x2.v1i64.p0"
-        )]
-        fn _vst1_s64_x2(a: int64x1_t, b: int64x1_t, ptr: *mut i64);
-    }
-    _vst1_s64_x2(b.0, b.1, a)
+#[cfg_attr(test, assert_instr(st2))]
+pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) {
+    crate::core_arch::macros::interleaving_store!(i8, 8, 2, a, b)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x2)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
-pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x2.v2i64.p0"
-        )]
-        fn _vst1q_s64_x2(a: int64x2_t, b: int64x2_t, ptr: *mut i64);
-    }
-    _vst1q_s64_x2(b.0, b.1, a)
+#[cfg_attr(test, assert_instr(st2))]
+pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) {
+    crate::core_arch::macros::interleaving_store!(i8, 16, 2, a, b)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x2)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v8i8.p0")]
-        fn _vst1_s8_x2(ptr: *mut i8, a: int8x8_t, b: int8x8_t);
-    }
-    _vst1_s8_x2(a, b.0, b.1)
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st2))]
+pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) {
+    crate::core_arch::macros::interleaving_store!(i16, 4, 2, a, b)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x2)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st2))]
+pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) {
+    crate::core_arch::macros::interleaving_store!(i16, 8, 2, a, b)
+}
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st2))]
+pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) {
+    crate::core_arch::macros::interleaving_store!(i32, 2, 2, a, b)
+}
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st2))]
+pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) {
+    crate::core_arch::macros::interleaving_store!(i32, 4, 2, a, b)
+}
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) {
+#[cfg_attr(test, assert_instr(vst2))]
+pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v16i8.p0")]
-        fn _vst1q_s8_x2(ptr: *mut i8, a: int8x16_t, b: int8x16_t);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v2f32.p0")]
+        fn _vst2_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, size: i32);
     }
-    _vst1q_s8_x2(a, b.0, b.1)
+    _vst2_f32(a as _, b.0, b.1, 4)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x2)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) {
+#[cfg_attr(test, assert_instr(vst2))]
+pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v4i16.p0")]
-        fn _vst1_s16_x2(ptr: *mut i16, a: int16x4_t, b: int16x4_t);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v4f32.p0")]
+        fn _vst2q_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, size: i32);
     }
-    _vst1_s16_x2(a, b.0, b.1)
+    _vst2q_f32(a as _, b.0, b.1, 4)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x2)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) {
+#[cfg_attr(test, assert_instr(vst2))]
+pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v8i16.p0")]
-        fn _vst1q_s16_x2(ptr: *mut i16, a: int16x8_t, b: int16x8_t);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v8i8.p0")]
+        fn _vst2_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, size: i32);
     }
-    _vst1q_s16_x2(a, b.0, b.1)
+    _vst2_s8(a as _, b.0, b.1, 1)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x2)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) {
+#[cfg_attr(test, assert_instr(vst2))]
+pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v2i32.p0")]
-        fn _vst1_s32_x2(ptr: *mut i32, a: int32x2_t, b: int32x2_t);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v16i8.p0")]
+        fn _vst2q_s8(ptr: *mut i8, a: int8x16_t, b: int8x16_t, size: i32);
     }
-    _vst1_s32_x2(a, b.0, b.1)
+    _vst2q_s8(a as _, b.0, b.1, 1)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x2)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) {
+#[cfg_attr(test, assert_instr(vst2))]
+pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v4i32.p0")]
-        fn _vst1q_s32_x2(ptr: *mut i32, a: int32x4_t, b: int32x4_t);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v4i16.p0")]
+        fn _vst2_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, size: i32);
     }
-    _vst1q_s32_x2(a, b.0, b.1)
+    _vst2_s16(a as _, b.0, b.1, 2)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x2)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) {
+#[cfg_attr(test, assert_instr(vst2))]
+pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v1i64.p0")]
-        fn _vst1_s64_x2(ptr: *mut i64, a: int64x1_t, b: int64x1_t);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v8i16.p0")]
+        fn _vst2q_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, size: i32);
     }
-    _vst1_s64_x2(a, b.0, b.1)
+    _vst2q_s16(a as _, b.0, b.1, 2)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x2)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst2))]
+pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v2i32.p0")]
+        fn _vst2_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, size: i32);
+    }
+    _vst2_s32(a as _, b.0, b.1, 4)
+}
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) {
+#[cfg_attr(test, assert_instr(vst2))]
+pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v2i64.p0")]
-        fn _vst1q_s64_x2(ptr: *mut i64, a: int64x2_t, b: int64x2_t);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v4i32.p0")]
+        fn _vst2q_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, size: i32);
     }
-    _vst1q_s64_x2(a, b.0, b.1)
+    _vst2q_s32(a as _, b.0, b.1, 4)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x3)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
-pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) {
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vst2_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x2_t) {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x3.v8i8.p0"
+            link_name = "llvm.aarch64.neon.st2lane.v4f16.p0"
         )]
-        fn _vst1_s8_x3(a: int8x8_t, b: int8x8_t, c: int8x8_t, ptr: *mut i8);
+        fn _vst2_lane_f16(a: float16x4_t, b: float16x4_t, n: i64, ptr: *mut i8);
     }
-    _vst1_s8_x3(b.0, b.1, b.2, a)
+    _vst2_lane_f16(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x3)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
-pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) {
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vst2q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x2_t) {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x3.v16i8.p0"
+            link_name = "llvm.aarch64.neon.st2lane.v8f16.p0"
         )]
-        fn _vst1q_s8_x3(a: int8x16_t, b: int8x16_t, c: int8x16_t, ptr: *mut i8);
+        fn _vst2q_lane_f16(a: float16x8_t, b: float16x8_t, n: i64, ptr: *mut i8);
     }
-    _vst1q_s8_x3(b.0, b.1, b.2, a)
+    _vst2q_lane_f16(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x3)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vst2_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x2_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0.v4f16")]
+        fn _vst2_lane_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, n: i32, size: i32);
+    }
+    _vst2_lane_f16(a as _, b.0, b.1, LANE, 2)
+}
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vst2q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x2_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0.v8f16")]
+        fn _vst2q_lane_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, n: i32, size: i32);
+    }
+    _vst2q_lane_f16(a as _, b.0, b.1, LANE, 2)
+}
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
-pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) {
+pub unsafe fn vst2_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x3.v4i16.p0"
+            link_name = "llvm.aarch64.neon.st2lane.v2f32.p0"
         )]
-        fn _vst1_s16_x3(a: int16x4_t, b: int16x4_t, c: int16x4_t, ptr: *mut i16);
+        fn _vst2_lane_f32(a: float32x2_t, b: float32x2_t, n: i64, ptr: *mut i8);
     }
-    _vst1_s16_x3(b.0, b.1, b.2, a)
+    _vst2_lane_f32(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x3)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
-pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) {
+pub unsafe fn vst2q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x2_t) {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x3.v8i16.p0"
+            link_name = "llvm.aarch64.neon.st2lane.v4f32.p0"
         )]
-        fn _vst1q_s16_x3(a: int16x8_t, b: int16x8_t, c: int16x8_t, ptr: *mut i16);
+        fn _vst2q_lane_f32(a: float32x4_t, b: float32x4_t, n: i64, ptr: *mut i8);
     }
-    _vst1q_s16_x3(b.0, b.1, b.2, a)
+    _vst2q_lane_f32(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x3)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
-pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) {
+pub unsafe fn vst2_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x2_t) {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x3.v2i32.p0"
+            link_name = "llvm.aarch64.neon.st2lane.v8i8.p0"
         )]
-        fn _vst1_s32_x3(a: int32x2_t, b: int32x2_t, c: int32x2_t, ptr: *mut i32);
+        fn _vst2_lane_s8(a: int8x8_t, b: int8x8_t, n: i64, ptr: *mut i8);
     }
-    _vst1_s32_x3(b.0, b.1, b.2, a)
+    _vst2_lane_s8(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x3)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
-pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) {
+pub unsafe fn vst2_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x2_t) {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x3.v4i32.p0"
+            link_name = "llvm.aarch64.neon.st2lane.v4i16.p0"
         )]
-        fn _vst1q_s32_x3(a: int32x4_t, b: int32x4_t, c: int32x4_t, ptr: *mut i32);
+        fn _vst2_lane_s16(a: int16x4_t, b: int16x4_t, n: i64, ptr: *mut i8);
     }
-    _vst1q_s32_x3(b.0, b.1, b.2, a)
+    _vst2_lane_s16(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x3)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
-pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) {
+pub unsafe fn vst2q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x2_t) {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x3.v1i64.p0"
+            link_name = "llvm.aarch64.neon.st2lane.v8i16.p0"
         )]
-        fn _vst1_s64_x3(a: int64x1_t, b: int64x1_t, c: int64x1_t, ptr: *mut i64);
+        fn _vst2q_lane_s16(a: int16x8_t, b: int16x8_t, n: i64, ptr: *mut i8);
     }
-    _vst1_s64_x3(b.0, b.1, b.2, a)
+    _vst2q_lane_s16(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x3)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
-pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) {
+pub unsafe fn vst2_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x3.v2i64.p0"
+            link_name = "llvm.aarch64.neon.st2lane.v2i32.p0"
         )]
-        fn _vst1q_s64_x3(a: int64x2_t, b: int64x2_t, c: int64x2_t, ptr: *mut i64);
+        fn _vst2_lane_s32(a: int32x2_t, b: int32x2_t, n: i64, ptr: *mut i8);
     }
-    _vst1q_s64_x3(b.0, b.1, b.2, a)
+    _vst2_lane_s32(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x3)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) {
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x2_t) {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v8i8.p0")]
-        fn _vst1_s8_x3(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2lane.v4i32.p0"
+        )]
+        fn _vst2q_lane_s32(a: int32x4_t, b: int32x4_t, n: i64, ptr: *mut i8);
     }
-    _vst1_s8_x3(a, b.0, b.1, b.2)
+    _vst2q_lane_s32(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x3)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) {
+pub unsafe fn vst2_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v16i8.p0")]
-        fn _vst1q_s8_x3(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v2f32.p0")]
+        fn _vst2_lane_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, n: i32, size: i32);
     }
-    _vst1q_s8_x3(a, b.0, b.1, b.2)
+    _vst2_lane_f32(a as _, b.0, b.1, LANE, 4)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x3)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) {
+pub unsafe fn vst2q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x2_t) {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v4i16.p0")]
-        fn _vst1_s16_x3(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v4f32.p0")]
+        fn _vst2q_lane_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, n: i32, size: i32);
     }
-    _vst1_s16_x3(a, b.0, b.1, b.2)
+    _vst2q_lane_f32(a as _, b.0, b.1, LANE, 4)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x3)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) {
+pub unsafe fn vst2_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x2_t) {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v8i16.p0")]
-        fn _vst1q_s16_x3(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v8i8.p0")]
+        fn _vst2_lane_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32);
     }
-    _vst1q_s16_x3(a, b.0, b.1, b.2)
+    _vst2_lane_s8(a as _, b.0, b.1, LANE, 1)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x3)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) {
+pub unsafe fn vst2_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x2_t) {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v2i32.p0")]
-        fn _vst1_s32_x3(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v4i16.p0")]
+        fn _vst2_lane_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, n: i32, size: i32);
     }
-    _vst1_s32_x3(a, b.0, b.1, b.2)
+    _vst2_lane_s16(a as _, b.0, b.1, LANE, 2)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x3)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) {
+pub unsafe fn vst2q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x2_t) {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v4i32.p0")]
-        fn _vst1q_s32_x3(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v8i16.p0")]
+        fn _vst2q_lane_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, n: i32, size: i32);
     }
-    _vst1q_s32_x3(a, b.0, b.1, b.2)
+    _vst2q_lane_s16(a as _, b.0, b.1, LANE, 2)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x3)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) {
+pub unsafe fn vst2_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v1i64.p0")]
-        fn _vst1_s64_x3(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v2i32.p0")]
+        fn _vst2_lane_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, n: i32, size: i32);
     }
-    _vst1_s64_x3(a, b.0, b.1, b.2)
+    _vst2_lane_s32(a as _, b.0, b.1, LANE, 4)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x3)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) {
+pub unsafe fn vst2q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x2_t) {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0.v2i64.p0")]
-        fn _vst1q_s64_x3(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v4i32.p0")]
+        fn _vst2q_lane_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, n: i32, size: i32);
     }
-    _vst1q_s64_x3(a, b.0, b.1, b.2)
+    _vst2q_lane_s32(a as _, b.0, b.1, LANE, 4)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
-pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x4.v8i8.p0"
-        )]
-        fn _vst1_s8_x4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, ptr: *mut i8);
-    }
-    _vst1_s8_x4(b.0, b.1, b.2, b.3, a)
-}
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
-pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x4.v16i8.p0"
-        )]
-        fn _vst1q_s8_x4(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, ptr: *mut i8);
-    }
-    _vst1q_s8_x4(b.0, b.1, b.2, b.3, a)
-}
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
-pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x4.v4i16.p0"
-        )]
-        fn _vst1_s16_x4(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, ptr: *mut i16);
-    }
-    _vst1_s16_x4(b.0, b.1, b.2, b.3, a)
-}
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
-pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x4.v8i16.p0"
-        )]
-        fn _vst1q_s16_x4(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, ptr: *mut i16);
-    }
-    _vst1q_s16_x4(b.0, b.1, b.2, b.3, a)
-}
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
-pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x4.v2i32.p0"
-        )]
-        fn _vst1_s32_x4(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, ptr: *mut i32);
-    }
-    _vst1_s32_x4(b.0, b.1, b.2, b.3, a)
-}
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
-pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x4.v4i32.p0"
-        )]
-        fn _vst1q_s32_x4(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, ptr: *mut i32);
-    }
-    _vst1q_s32_x4(b.0, b.1, b.2, b.3, a)
-}
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
-pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x4.v1i64.p0"
-        )]
-        fn _vst1_s64_x4(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, ptr: *mut i64);
-    }
-    _vst1_s64_x4(b.0, b.1, b.2, b.3, a)
-}
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
-pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x4.v2i64.p0"
-        )]
-        fn _vst1q_s64_x4(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, ptr: *mut i64);
-    }
-    _vst1q_s64_x4(b.0, b.1, b.2, b.3, a)
-}
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v8i8.p0")]
-        fn _vst1_s8_x4(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t);
-    }
-    _vst1_s8_x4(a, b.0, b.1, b.2, b.3)
-}
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v16i8.p0")]
-        fn _vst1q_s8_x4(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t);
-    }
-    _vst1q_s8_x4(a, b.0, b.1, b.2, b.3)
-}
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4i16.p0")]
-        fn _vst1_s16_x4(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t);
-    }
-    _vst1_s16_x4(a, b.0, b.1, b.2, b.3)
-}
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v8i16.p0")]
-        fn _vst1q_s16_x4(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t);
-    }
-    _vst1q_s16_x4(a, b.0, b.1, b.2, b.3)
-}
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v2i32.p0")]
-        fn _vst1_s32_x4(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t);
-    }
-    _vst1_s32_x4(a, b.0, b.1, b.2, b.3)
-}
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v4i32.p0")]
-        fn _vst1q_s32_x4(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t);
-    }
-    _vst1q_s32_x4(a, b.0, b.1, b.2, b.3)
-}
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v1i64.p0")]
-        fn _vst1_s64_x4(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t);
-    }
-    _vst1_s64_x4(a, b.0, b.1, b.2, b.3)
-}
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst1))]
-pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0.v2i64.p0")]
-        fn _vst1q_s64_x4(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t);
-    }
-    _vst1q_s64_x4(a, b.0, b.1, b.2, b.3)
-}
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x2)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
+    assert_instr(st2, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -60722,21 +61697,23 @@ pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1_u8_x2(a: *mut u8, b: uint8x8x2_t) {
-    vst1_s8_x2(transmute(a), transmute(b))
+pub unsafe fn vst2_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8x2_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    vst2_lane_s8::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x3)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
+    assert_instr(st2, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -60745,21 +61722,23 @@ pub unsafe fn vst1_u8_x2(a: *mut u8, b: uint8x8x2_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1_u8_x3(a: *mut u8, b: uint8x8x3_t) {
-    vst1_s8_x3(transmute(a), transmute(b))
+pub unsafe fn vst2_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4x2_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    vst2_lane_s16::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x4)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
+    assert_instr(st2, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -60768,21 +61747,23 @@ pub unsafe fn vst1_u8_x3(a: *mut u8, b: uint8x8x3_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1_u8_x4(a: *mut u8, b: uint8x8x4_t) {
-    vst1_s8_x4(transmute(a), transmute(b))
+pub unsafe fn vst2q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8x2_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    vst2q_lane_s16::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x2)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
+    assert_instr(st2, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -60791,21 +61772,23 @@ pub unsafe fn vst1_u8_x4(a: *mut u8, b: uint8x8x4_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1q_u8_x2(a: *mut u8, b: uint8x16x2_t) {
-    vst1q_s8_x2(transmute(a), transmute(b))
+pub unsafe fn vst2_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    vst2_lane_s32::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x3)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
+    assert_instr(st2, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -60814,21 +61797,23 @@ pub unsafe fn vst1q_u8_x2(a: *mut u8, b: uint8x16x2_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1q_u8_x3(a: *mut u8, b: uint8x16x3_t) {
-    vst1q_s8_x3(transmute(a), transmute(b))
+pub unsafe fn vst2q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4x2_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    vst2q_lane_s32::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x4)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
+    assert_instr(st2, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -60837,21 +61822,23 @@ pub unsafe fn vst1q_u8_x3(a: *mut u8, b: uint8x16x3_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1q_u8_x4(a: *mut u8, b: uint8x16x4_t) {
-    vst1q_s8_x4(transmute(a), transmute(b))
+pub unsafe fn vst2_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x8x2_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    vst2_lane_s8::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x2)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
+    assert_instr(st2, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -60860,21 +61847,23 @@ pub unsafe fn vst1q_u8_x4(a: *mut u8, b: uint8x16x4_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1_u16_x2(a: *mut u16, b: uint16x4x2_t) {
-    vst1_s16_x2(transmute(a), transmute(b))
+pub unsafe fn vst2_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x4x2_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    vst2_lane_s16::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x3)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
+    assert_instr(st2, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -60883,20 +61872,21 @@ pub unsafe fn vst1_u16_x2(a: *mut u16, b: uint16x4x2_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1_u16_x3(a: *mut u16, b: uint16x4x3_t) {
-    vst1_s16_x3(transmute(a), transmute(b))
+pub unsafe fn vst2q_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x8x2_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    vst2q_lane_s16::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x4)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -60906,20 +61896,44 @@ pub unsafe fn vst1_u16_x3(a: *mut u16, b: uint16x4x3_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1_u16_x4(a: *mut u16, b: uint16x4x4_t) {
-    vst1_s16_x4(transmute(a), transmute(b))
+pub unsafe fn vst2_p64(a: *mut p64, b: poly64x1x2_t) {
+    vst2_s64(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x2)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) {
+    core::ptr::write_unaligned(a.cast(), b)
+}
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) {
+    core::ptr::write_unaligned(a.cast(), b)
+}
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -60929,20 +61943,20 @@ pub unsafe fn vst1_u16_x4(a: *mut u16, b: uint16x4x4_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1q_u16_x2(a: *mut u16, b: uint16x8x2_t) {
-    vst1q_s16_x2(transmute(a), transmute(b))
+pub unsafe fn vst2_u64(a: *mut u64, b: uint64x1x2_t) {
+    vst2_s64(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x3)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
+    assert_instr(st2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -60952,20 +61966,20 @@ pub unsafe fn vst1q_u16_x2(a: *mut u16, b: uint16x8x2_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1q_u16_x3(a: *mut u16, b: uint16x8x3_t) {
-    vst1q_s16_x3(transmute(a), transmute(b))
+pub unsafe fn vst2_u8(a: *mut u8, b: uint8x8x2_t) {
+    vst2_s8(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x4)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
+    assert_instr(st2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -60975,20 +61989,20 @@ pub unsafe fn vst1q_u16_x3(a: *mut u16, b: uint16x8x3_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1q_u16_x4(a: *mut u16, b: uint16x8x4_t) {
-    vst1q_s16_x4(transmute(a), transmute(b))
+pub unsafe fn vst2q_u8(a: *mut u8, b: uint8x16x2_t) {
+    vst2q_s8(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x2)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
+    assert_instr(st2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -60998,20 +62012,20 @@ pub unsafe fn vst1q_u16_x4(a: *mut u16, b: uint16x8x4_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1_u32_x2(a: *mut u32, b: uint32x2x2_t) {
-    vst1_s32_x2(transmute(a), transmute(b))
+pub unsafe fn vst2_u16(a: *mut u16, b: uint16x4x2_t) {
+    vst2_s16(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x3)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
+    assert_instr(st2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -61021,20 +62035,20 @@ pub unsafe fn vst1_u32_x2(a: *mut u32, b: uint32x2x2_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1_u32_x3(a: *mut u32, b: uint32x2x3_t) {
-    vst1_s32_x3(transmute(a), transmute(b))
+pub unsafe fn vst2q_u16(a: *mut u16, b: uint16x8x2_t) {
+    vst2q_s16(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x4)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
+    assert_instr(st2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -61044,20 +62058,20 @@ pub unsafe fn vst1_u32_x3(a: *mut u32, b: uint32x2x3_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1_u32_x4(a: *mut u32, b: uint32x2x4_t) {
-    vst1_s32_x4(transmute(a), transmute(b))
+pub unsafe fn vst2_u32(a: *mut u32, b: uint32x2x2_t) {
+    vst2_s32(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x2)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
+    assert_instr(st2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -61067,20 +62081,20 @@ pub unsafe fn vst1_u32_x4(a: *mut u32, b: uint32x2x4_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1q_u32_x2(a: *mut u32, b: uint32x4x2_t) {
-    vst1q_s32_x2(transmute(a), transmute(b))
+pub unsafe fn vst2q_u32(a: *mut u32, b: uint32x4x2_t) {
+    vst2q_s32(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x3)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
+    assert_instr(st2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -61090,20 +62104,20 @@ pub unsafe fn vst1q_u32_x2(a: *mut u32, b: uint32x4x2_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1q_u32_x3(a: *mut u32, b: uint32x4x3_t) {
-    vst1q_s32_x3(transmute(a), transmute(b))
+pub unsafe fn vst2_p8(a: *mut p8, b: poly8x8x2_t) {
+    vst2_s8(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x4)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
+    assert_instr(st2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -61113,20 +62127,20 @@ pub unsafe fn vst1q_u32_x3(a: *mut u32, b: uint32x4x3_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1q_u32_x4(a: *mut u32, b: uint32x4x4_t) {
-    vst1q_s32_x4(transmute(a), transmute(b))
+pub unsafe fn vst2q_p8(a: *mut p8, b: poly8x16x2_t) {
+    vst2q_s8(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x2)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
+    assert_instr(st2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -61136,20 +62150,20 @@ pub unsafe fn vst1q_u32_x4(a: *mut u32, b: uint32x4x4_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1_u64_x2(a: *mut u64, b: uint64x1x2_t) {
-    vst1_s64_x2(transmute(a), transmute(b))
+pub unsafe fn vst2_p16(a: *mut p16, b: poly16x4x2_t) {
+    vst2_s16(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x3)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
+    assert_instr(st2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -61159,2445 +62173,3117 @@ pub unsafe fn vst1_u64_x2(a: *mut u64, b: uint64x1x2_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst1_u64_x3(a: *mut u64, b: uint64x1x3_t) {
-    vst1_s64_x3(transmute(a), transmute(b))
+pub unsafe fn vst2q_p16(a: *mut p16, b: poly16x8x2_t) {
+    vst2q_s16(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x4)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1_u64_x4(a: *mut u64, b: uint64x1x4_t) {
-    vst1_s64_x4(transmute(a), transmute(b))
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(vst3))]
+pub unsafe fn vst3_f16(a: *mut f16, b: float16x4x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v4f16")]
+        fn _vst3_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, c: float16x4_t, size: i32);
+    }
+    _vst3_f16(a as _, b.0, b.1, b.2, 2)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x2)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1q_u64_x2(a: *mut u64, b: uint64x2x2_t) {
-    vst1q_s64_x2(transmute(a), transmute(b))
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(vst3))]
+pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v8f16")]
+        fn _vst3q_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, c: float16x8_t, size: i32);
+    }
+    _vst3q_f16(a as _, b.0, b.1, b.2, 2)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x3)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1q_u64_x3(a: *mut u64, b: uint64x2x3_t) {
-    vst1q_s64_x3(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3_f16(a: *mut f16, b: float16x4x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3.v4f16.p0"
+        )]
+        fn _vst3_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, ptr: *mut i8);
+    }
+    _vst3_f16(b.0, b.1, b.2, a as _)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x4)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1q_u64_x4(a: *mut u64, b: uint64x2x4_t) {
-    vst1q_s64_x4(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3.v8f16.p0"
+        )]
+        fn _vst3q_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, ptr: *mut i8);
+    }
+    _vst3q_f16(b.0, b.1, b.2, a as _)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x2)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1_p8_x2(a: *mut p8, b: poly8x8x2_t) {
-    vst1_s8_x2(transmute(a), transmute(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst3))]
+pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) {
+    crate::core_arch::macros::interleaving_store!(f32, 2, 3, a, b)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x3)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1_p8_x3(a: *mut p8, b: poly8x8x3_t) {
-    vst1_s8_x3(transmute(a), transmute(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst3))]
+pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) {
+    crate::core_arch::macros::interleaving_store!(f32, 4, 3, a, b)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x4)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1_p8_x4(a: *mut p8, b: poly8x8x4_t) {
-    vst1_s8_x4(transmute(a), transmute(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst3))]
+pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) {
+    crate::core_arch::macros::interleaving_store!(i8, 8, 3, a, b)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x2)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1q_p8_x2(a: *mut p8, b: poly8x16x2_t) {
-    vst1q_s8_x2(transmute(a), transmute(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst3))]
+pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) {
+    crate::core_arch::macros::interleaving_store!(i8, 16, 3, a, b)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x3)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1q_p8_x3(a: *mut p8, b: poly8x16x3_t) {
-    vst1q_s8_x3(transmute(a), transmute(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst3))]
+pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) {
+    crate::core_arch::macros::interleaving_store!(i16, 4, 3, a, b)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x4)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1q_p8_x4(a: *mut p8, b: poly8x16x4_t) {
-    vst1q_s8_x4(transmute(a), transmute(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst3))]
+pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) {
+    crate::core_arch::macros::interleaving_store!(i16, 8, 3, a, b)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x2)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1_p16_x2(a: *mut p16, b: poly16x4x2_t) {
-    vst1_s16_x2(transmute(a), transmute(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst3))]
+pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) {
+    crate::core_arch::macros::interleaving_store!(i32, 2, 3, a, b)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x3)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1_p16_x3(a: *mut p16, b: poly16x4x3_t) {
-    vst1_s16_x3(transmute(a), transmute(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst3))]
+pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) {
+    crate::core_arch::macros::interleaving_store!(i32, 4, 3, a, b)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x4)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1_p16_x4(a: *mut p16, b: poly16x4x4_t) {
-    vst1_s16_x4(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) {
+    crate::core_arch::macros::interleaving_store!(f32, 2, 3, a, b)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x2)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1q_p16_x2(a: *mut p16, b: poly16x8x2_t) {
-    vst1q_s16_x2(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) {
+    crate::core_arch::macros::interleaving_store!(f32, 4, 3, a, b)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x3)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1q_p16_x3(a: *mut p16, b: poly16x8x3_t) {
-    vst1q_s16_x3(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) {
+    crate::core_arch::macros::interleaving_store!(i8, 8, 3, a, b)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x4)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1q_p16_x4(a: *mut p16, b: poly16x8x4_t) {
-    vst1q_s16_x4(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) {
+    crate::core_arch::macros::interleaving_store!(i8, 16, 3, a, b)
 }
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64", ALIGN = 0))]
-#[rustc_legacy_const_generics(2)]
-unsafe fn vst1_v1i64<const ALIGN: i32>(addr: *const i8, val: int64x1_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v1i64.p0")]
-        fn _vst1_v1i64(addr: *const i8, val: int64x1_t, align: i32);
-    }
-    _vst1_v1i64(addr, val, ALIGN)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) {
+    crate::core_arch::macros::interleaving_store!(i16, 4, 3, a, b)
 }
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))]
-#[rustc_legacy_const_generics(2)]
-unsafe fn vst1_v2f32<const ALIGN: i32>(addr: *const i8, val: float32x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2f32.p0")]
-        fn _vst1_v2f32(addr: *const i8, val: float32x2_t, align: i32);
-    }
-    _vst1_v2f32(addr, val, ALIGN)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) {
+    crate::core_arch::macros::interleaving_store!(i16, 8, 3, a, b)
+}
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) {
+    crate::core_arch::macros::interleaving_store!(i32, 2, 3, a, b)
+}
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) {
+    crate::core_arch::macros::interleaving_store!(i32, 4, 3, a, b)
 }
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))]
+#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-unsafe fn vst1_v2i32<const ALIGN: i32>(addr: *const i8, val: int32x2_t) {
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vst3_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x3_t) {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2i32.p0")]
-        fn _vst1_v2i32(addr: *const i8, val: int32x2_t, align: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4f16")]
+        fn _vst3_lane_f16(
+            ptr: *mut i8,
+            a: float16x4_t,
+            b: float16x4_t,
+            c: float16x4_t,
+            n: i32,
+            size: i32,
+        );
     }
-    _vst1_v2i32(addr, val, ALIGN)
+    _vst3_lane_f16(a as _, b.0, b.1, b.2, LANE, 4)
 }
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16", ALIGN = 0))]
+#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-unsafe fn vst1_v4i16<const ALIGN: i32>(addr: *const i8, val: int16x4_t) {
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vst3q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x3_t) {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4i16.p0")]
-        fn _vst1_v4i16(addr: *const i8, val: int16x4_t, align: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v8f16")]
+        fn _vst3q_lane_f16(
+            ptr: *mut i8,
+            a: float16x8_t,
+            b: float16x8_t,
+            c: float16x8_t,
+            n: i32,
+            size: i32,
+        );
     }
-    _vst1_v4i16(addr, val, ALIGN)
+    _vst3q_lane_f16(a as _, b.0, b.1, b.2, LANE, 4)
 }
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8", ALIGN = 0))]
+#[cfg(not(target_arch = "arm"))]
 #[rustc_legacy_const_generics(2)]
-unsafe fn vst1_v8i8<const ALIGN: i32>(addr: *const i8, val: int8x8_t) {
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vst3_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x3_t) {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8i8.p0")]
-        fn _vst1_v8i8(addr: *const i8, val: int8x8_t, align: i32);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3lane.v4f16.p0"
+        )]
+        fn _vst3_lane_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, n: i64, ptr: *mut i8);
     }
-    _vst1_v8i8(addr, val, ALIGN)
+    _vst3_lane_f16(b.0, b.1, b.2, LANE as i64, a as _)
 }
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8", ALIGN = 0))]
+#[cfg(not(target_arch = "arm"))]
 #[rustc_legacy_const_generics(2)]
-unsafe fn vst1q_v16i8<const ALIGN: i32>(addr: *const i8, val: int8x16_t) {
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vst3q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x3_t) {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v16i8.p0")]
-        fn _vst1q_v16i8(addr: *const i8, val: int8x16_t, align: i32);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3lane.v8f16.p0"
+        )]
+        fn _vst3q_lane_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, n: i64, ptr: *mut i8);
     }
-    _vst1q_v16i8(addr, val, ALIGN)
+    _vst3q_lane_f16(b.0, b.1, b.2, LANE as i64, a as _)
 }
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64", ALIGN = 0))]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-unsafe fn vst1q_v2i64<const ALIGN: i32>(addr: *const i8, val: int64x2_t) {
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst3_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x3_t) {
+    static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2i64.p0")]
-        fn _vst1q_v2i64(addr: *const i8, val: int64x2_t, align: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v2f32")]
+        fn _vst3_lane_f32(
+            ptr: *mut i8,
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            n: i32,
+            size: i32,
+        );
     }
-    _vst1q_v2i64(addr, val, ALIGN)
+    _vst3_lane_f32(a as _, b.0, b.1, b.2, LANE, 4)
 }
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-unsafe fn vst1q_v4f32<const ALIGN: i32>(addr: *const i8, val: float32x4_t) {
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst3q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x3_t) {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4f32.p0")]
-        fn _vst1q_v4f32(addr: *const i8, val: float32x4_t, align: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4f32")]
+        fn _vst3q_lane_f32(
+            ptr: *mut i8,
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            n: i32,
+            size: i32,
+        );
     }
-    _vst1q_v4f32(addr, val, ALIGN)
+    _vst3q_lane_f32(a as _, b.0, b.1, b.2, LANE, 4)
 }
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-unsafe fn vst1q_v4i32<const ALIGN: i32>(addr: *const i8, val: int32x4_t) {
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst3_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x3_t) {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4i32.p0")]
-        fn _vst1q_v4i32(addr: *const i8, val: int32x4_t, align: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v8i8")]
+        fn _vst3_lane_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i32, size: i32);
     }
-    _vst1q_v4i32(addr, val, ALIGN)
+    _vst3_lane_s8(a as _, b.0, b.1, b.2, LANE, 1)
 }
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16", ALIGN = 0))]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-unsafe fn vst1q_v8i16<const ALIGN: i32>(addr: *const i8, val: int16x8_t) {
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst3_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x3_t) {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8i16.p0")]
-        fn _vst1q_v8i16(addr: *const i8, val: int16x8_t, align: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4i16")]
+        fn _vst3_lane_s16(
+            ptr: *mut i8,
+            a: int16x4_t,
+            b: int16x4_t,
+            c: int16x4_t,
+            n: i32,
+            size: i32,
+        );
     }
-    _vst1q_v8i16(addr, val, ALIGN)
+    _vst3_lane_s16(a as _, b.0, b.1, b.2, LANE, 2)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v4f16)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[target_feature(enable = "neon,fp16")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
-unsafe fn vst1_v4f16(addr: *const i8, val: float16x4_t, align: i32) {
+pub unsafe fn vst3q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x3_t) {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4f16.p0")]
-        fn _vst1_v4f16(addr: *const i8, val: float16x4_t, align: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v8i16")]
+        fn _vst3q_lane_s16(
+            ptr: *mut i8,
+            a: int16x8_t,
+            b: int16x8_t,
+            c: int16x8_t,
+            n: i32,
+            size: i32,
+        );
     }
-    _vst1_v4f16(addr, val, align)
+    _vst3q_lane_s16(a as _, b.0, b.1, b.2, LANE, 2)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v8f16)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[target_feature(enable = "neon,fp16")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
-unsafe fn vst1q_v8f16(addr: *const i8, val: float16x8_t, align: i32) {
+pub unsafe fn vst3_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x3_t) {
+    static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8f16.p0")]
-        fn _vst1q_v8f16(addr: *const i8, val: float16x8_t, align: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v2i32")]
+        fn _vst3_lane_s32(
+            ptr: *mut i8,
+            a: int32x2_t,
+            b: int32x2_t,
+            c: int32x2_t,
+            n: i32,
+            size: i32,
+        );
     }
-    _vst1q_v8f16(addr, val, align)
+    _vst3_lane_s32(a as _, b.0, b.1, b.2, LANE, 4)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p64)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, LANE = 0)
-)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst1q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    *a = simd_extract!(b, LANE as u32);
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst3q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x3_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4i32")]
+        fn _vst3q_lane_s32(
+            ptr: *mut i8,
+            a: int32x4_t,
+            b: int32x4_t,
+            c: int32x4_t,
+            n: i32,
+            size: i32,
+        );
+    }
+    _vst3q_lane_s32(a as _, b.0, b.1, b.2, LANE, 4)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f16)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(st2))]
-pub unsafe fn vst2_f16(a: *mut f16, b: float16x4x2_t) {
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x3_t) {
+    static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v4f16.p0"
+            link_name = "llvm.aarch64.neon.st3lane.v2f32.p0"
         )]
-        fn _vst2_f16(a: float16x4_t, b: float16x4_t, ptr: *mut i8);
+        fn _vst3_lane_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t, n: i64, ptr: *mut i8);
     }
-    _vst2_f16(b.0, b.1, a as _)
+    _vst3_lane_f32(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f16)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(st2))]
-pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) {
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x3_t) {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v8f16.p0"
+            link_name = "llvm.aarch64.neon.st3lane.v4f32.p0"
         )]
-        fn _vst2q_f16(a: float16x8_t, b: float16x8_t, ptr: *mut i8);
+        fn _vst3q_lane_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t, n: i64, ptr: *mut i8);
     }
-    _vst2q_f16(b.0, b.1, a as _)
+    _vst3q_lane_f32(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f16)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(vst2))]
-pub unsafe fn vst2_f16(a: *mut f16, b: float16x4x2_t) {
+#[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x3_t) {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0.v4f16")]
-        fn _vst2_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, size: i32);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3lane.v8i8.p0"
+        )]
+        fn _vst3_lane_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i64, ptr: *mut i8);
     }
-    _vst2_f16(a as _, b.0, b.1, 2)
+    _vst3_lane_s8(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f16)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(vst2))]
-pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) {
+#[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x3_t) {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0.v8f16")]
-        fn _vst2q_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, size: i32);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3lane.v4i16.p0"
+        )]
+        fn _vst3_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t, n: i64, ptr: *mut i8);
     }
-    _vst2q_f16(a as _, b.0, b.1, 2)
+    _vst3_lane_s16(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st2))]
-pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) {
-    crate::core_arch::macros::interleaving_store!(f32, 2, 2, a, b)
+pub unsafe fn vst3q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x3_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3lane.v8i16.p0"
+        )]
+        fn _vst3q_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t, n: i64, ptr: *mut i8);
+    }
+    _vst3q_lane_s16(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f32)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st2))]
-pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) {
-    crate::core_arch::macros::interleaving_store!(f32, 4, 2, a, b)
+pub unsafe fn vst3_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x3_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3lane.v2i32.p0"
+        )]
+        fn _vst3_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t, n: i64, ptr: *mut i8);
+    }
+    _vst3_lane_s32(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s8)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st2))]
-pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) {
-    crate::core_arch::macros::interleaving_store!(i8, 8, 2, a, b)
+pub unsafe fn vst3q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x3_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3lane.v4i32.p0"
+        )]
+        fn _vst3q_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t, n: i64, ptr: *mut i8);
+    }
+    _vst3q_lane_s32(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s8)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st2))]
-pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) {
-    crate::core_arch::macros::interleaving_store!(i8, 16, 2, a, b)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8x3_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    vst3_lane_s8::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s16)"]
-#[doc = "## Safety"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u16)"]
+#[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st2))]
-pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) {
-    crate::core_arch::macros::interleaving_store!(i16, 4, 2, a, b)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4x3_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    vst3_lane_s16::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s16)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st2))]
-pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) {
-    crate::core_arch::macros::interleaving_store!(i16, 8, 2, a, b)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8x3_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    vst3q_lane_s16::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s32)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st2))]
-pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) {
-    crate::core_arch::macros::interleaving_store!(i32, 2, 2, a, b)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2x3_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    vst3_lane_s32::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s32)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st2))]
-pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) {
-    crate::core_arch::macros::interleaving_store!(i32, 4, 2, a, b)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4x3_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    vst3q_lane_s32::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst2))]
-pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v2f32.p0")]
-        fn _vst2_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, size: i32);
-    }
-    _vst2_f32(a as _, b.0, b.1, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x8x3_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    vst3_lane_s8::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f32)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst2))]
-pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v4f32.p0")]
-        fn _vst2q_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, size: i32);
-    }
-    _vst2q_f32(a as _, b.0, b.1, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x4x3_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    vst3_lane_s16::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s8)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst2))]
-pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v8i8.p0")]
-        fn _vst2_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, size: i32);
-    }
-    _vst2_s8(a as _, b.0, b.1, 1)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3q_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x8x3_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    vst3q_lane_s16::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s8)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst2))]
-pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v16i8.p0")]
-        fn _vst2q_s8(ptr: *mut i8, a: int8x16_t, b: int8x16_t, size: i32);
-    }
-    _vst2q_s8(a as _, b.0, b.1, 1)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3_p64(a: *mut p64, b: poly64x1x3_t) {
+    vst3_s64(transmute(a), transmute(b))
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s16)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst2))]
-pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v4i16.p0")]
-        fn _vst2_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, size: i32);
-    }
-    _vst2_s16(a as _, b.0, b.1, 2)
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) {
+    core::ptr::write_unaligned(a.cast(), b)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s16)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst2))]
-pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v8i16.p0")]
-        fn _vst2q_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, size: i32);
-    }
-    _vst2q_s16(a as _, b.0, b.1, 2)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) {
+    core::ptr::write_unaligned(a.cast(), b)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s32)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst2))]
-pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v2i32.p0")]
-        fn _vst2_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, size: i32);
-    }
-    _vst2_s32(a as _, b.0, b.1, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3_u64(a: *mut u64, b: uint64x1x3_t) {
+    vst3_s64(transmute(a), transmute(b))
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s32)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst2))]
-pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v4i32.p0")]
-        fn _vst2q_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, size: i32);
-    }
-    _vst2q_s32(a as _, b.0, b.1, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3_u8(a: *mut u8, b: uint8x8x3_t) {
+    vst3_s8(transmute(a), transmute(b))
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f16)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vst2_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x2_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2lane.v4f16.p0"
-        )]
-        fn _vst2_lane_f16(a: float16x4_t, b: float16x4_t, n: i64, ptr: *mut i8);
-    }
-    _vst2_lane_f16(b.0, b.1, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3q_u8(a: *mut u8, b: uint8x16x3_t) {
+    vst3q_s8(transmute(a), transmute(b))
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f16)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vst2q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x2_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2lane.v8f16.p0"
-        )]
-        fn _vst2q_lane_f16(a: float16x8_t, b: float16x8_t, n: i64, ptr: *mut i8);
-    }
-    _vst2q_lane_f16(b.0, b.1, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3_u16(a: *mut u16, b: uint16x4x3_t) {
+    vst3_s16(transmute(a), transmute(b))
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f16)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vst2_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x2_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0.v4f16")]
-        fn _vst2_lane_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, n: i32, size: i32);
-    }
-    _vst2_lane_f16(a as _, b.0, b.1, LANE, 2)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3q_u16(a: *mut u16, b: uint16x8x3_t) {
+    vst3q_s16(transmute(a), transmute(b))
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f16)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vst2q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x2_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0.v8f16")]
-        fn _vst2q_lane_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, n: i32, size: i32);
-    }
-    _vst2q_lane_f16(a as _, b.0, b.1, LANE, 2)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3_u32(a: *mut u32, b: uint32x2x3_t) {
+    vst3_s32(transmute(a), transmute(b))
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f32)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2lane.v2f32.p0"
-        )]
-        fn _vst2_lane_f32(a: float32x2_t, b: float32x2_t, n: i64, ptr: *mut i8);
-    }
-    _vst2_lane_f32(b.0, b.1, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3q_u32(a: *mut u32, b: uint32x4x3_t) {
+    vst3q_s32(transmute(a), transmute(b))
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f32)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x2_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2lane.v4f32.p0"
-        )]
-        fn _vst2q_lane_f32(a: float32x4_t, b: float32x4_t, n: i64, ptr: *mut i8);
-    }
-    _vst2q_lane_f32(b.0, b.1, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3_p8(a: *mut p8, b: poly8x8x3_t) {
+    vst3_s8(transmute(a), transmute(b))
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s8)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x2_t) {
-    static_assert_uimm_bits!(LANE, 3);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3q_p8(a: *mut p8, b: poly8x16x3_t) {
+    vst3q_s8(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3_p16(a: *mut p16, b: poly16x4x3_t) {
+    vst3_s16(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3q_p16(a: *mut p16, b: poly16x8x3_t) {
+    vst3q_s16(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(vst4))]
+pub unsafe fn vst4_f16(a: *mut f16, b: float16x4x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2lane.v8i8.p0"
-        )]
-        fn _vst2_lane_s8(a: int8x8_t, b: int8x8_t, n: i64, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4f16")]
+        fn _vst4_f16(
+            ptr: *mut i8,
+            a: float16x4_t,
+            b: float16x4_t,
+            c: float16x4_t,
+            d: float16x4_t,
+            size: i32,
+        );
     }
-    _vst2_lane_s8(b.0, b.1, LANE as i64, a as _)
+    _vst4_f16(a as _, b.0, b.1, b.2, b.3, 2)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s16)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x2_t) {
-    static_assert_uimm_bits!(LANE, 2);
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(vst4))]
+pub unsafe fn vst4q_f16(a: *mut f16, b: float16x8x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2lane.v4i16.p0"
-        )]
-        fn _vst2_lane_s16(a: int16x4_t, b: int16x4_t, n: i64, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v8f16")]
+        fn _vst4q_f16(
+            ptr: *mut i8,
+            a: float16x8_t,
+            b: float16x8_t,
+            c: float16x8_t,
+            d: float16x8_t,
+            size: i32,
+        );
     }
-    _vst2_lane_s16(b.0, b.1, LANE as i64, a as _)
+    _vst4q_f16(a as _, b.0, b.1, b.2, b.3, 2)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s16)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x2_t) {
-    static_assert_uimm_bits!(LANE, 3);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4_f16(a: *mut f16, b: float16x4x4_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2lane.v8i16.p0"
+            link_name = "llvm.aarch64.neon.st4.v4f16.p0"
         )]
-        fn _vst2q_lane_s16(a: int16x8_t, b: int16x8_t, n: i64, ptr: *mut i8);
+        fn _vst4_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, d: float16x4_t, ptr: *mut i8);
     }
-    _vst2q_lane_s16(b.0, b.1, LANE as i64, a as _)
+    _vst4_f16(b.0, b.1, b.2, b.3, a as _)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s32)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4q_f16(a: *mut f16, b: float16x8x4_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2lane.v2i32.p0"
+            link_name = "llvm.aarch64.neon.st4.v8f16.p0"
         )]
-        fn _vst2_lane_s32(a: int32x2_t, b: int32x2_t, n: i64, ptr: *mut i8);
+        fn _vst4q_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, d: float16x8_t, ptr: *mut i8);
     }
-    _vst2_lane_s32(b.0, b.1, LANE as i64, a as _)
+    _vst4q_f16(b.0, b.1, b.2, b.3, a as _)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s32)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x2_t) {
-    static_assert_uimm_bits!(LANE, 2);
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst4))]
+pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2lane.v4i32.p0"
-        )]
-        fn _vst2q_lane_s32(a: int32x4_t, b: int32x4_t, n: i64, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v2f32")]
+        fn _vst4_f32(
+            ptr: *mut i8,
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            d: float32x2_t,
+            size: i32,
+        );
     }
-    _vst2q_lane_s32(b.0, b.1, LANE as i64, a as _)
+    _vst4_f32(a as _, b.0, b.1, b.2, b.3, 4)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f32)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst2_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
+#[cfg_attr(test, assert_instr(vst4))]
+pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v2f32.p0")]
-        fn _vst2_lane_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, n: i32, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4f32")]
+        fn _vst4q_f32(
+            ptr: *mut i8,
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            d: float32x4_t,
+            size: i32,
+        );
     }
-    _vst2_lane_f32(a as _, b.0, b.1, LANE, 4)
+    _vst4q_f32(a as _, b.0, b.1, b.2, b.3, 4)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f32)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst2q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x2_t) {
-    static_assert_uimm_bits!(LANE, 2);
+#[cfg_attr(test, assert_instr(vst4))]
+pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v4f32.p0")]
-        fn _vst2q_lane_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, n: i32, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v8i8")]
+        fn _vst4_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, size: i32);
     }
-    _vst2q_lane_f32(a as _, b.0, b.1, LANE, 4)
+    _vst4_s8(a as _, b.0, b.1, b.2, b.3, 1)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s8)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst2_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x2_t) {
-    static_assert_uimm_bits!(LANE, 3);
+#[cfg_attr(test, assert_instr(vst4))]
+pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v8i8.p0")]
-        fn _vst2_lane_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v16i8")]
+        fn _vst4q_s8(
+            ptr: *mut i8,
+            a: int8x16_t,
+            b: int8x16_t,
+            c: int8x16_t,
+            d: int8x16_t,
+            size: i32,
+        );
     }
-    _vst2_lane_s8(a as _, b.0, b.1, LANE, 1)
+    _vst4q_s8(a as _, b.0, b.1, b.2, b.3, 1)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s16)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst2_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x2_t) {
-    static_assert_uimm_bits!(LANE, 2);
+#[cfg_attr(test, assert_instr(vst4))]
+pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v4i16.p0")]
-        fn _vst2_lane_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, n: i32, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4i16")]
+        fn _vst4_s16(
+            ptr: *mut i8,
+            a: int16x4_t,
+            b: int16x4_t,
+            c: int16x4_t,
+            d: int16x4_t,
+            size: i32,
+        );
     }
-    _vst2_lane_s16(a as _, b.0, b.1, LANE, 2)
+    _vst4_s16(a as _, b.0, b.1, b.2, b.3, 2)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s16)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst2q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x2_t) {
-    static_assert_uimm_bits!(LANE, 3);
+#[cfg_attr(test, assert_instr(vst4))]
+pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v8i16.p0")]
-        fn _vst2q_lane_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, n: i32, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v8i16")]
+        fn _vst4q_s16(
+            ptr: *mut i8,
+            a: int16x8_t,
+            b: int16x8_t,
+            c: int16x8_t,
+            d: int16x8_t,
+            size: i32,
+        );
     }
-    _vst2q_lane_s16(a as _, b.0, b.1, LANE, 2)
+    _vst4q_s16(a as _, b.0, b.1, b.2, b.3, 2)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s32)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst2_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
+#[cfg_attr(test, assert_instr(vst4))]
+pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v2i32.p0")]
-        fn _vst2_lane_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, n: i32, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v2i32")]
+        fn _vst4_s32(
+            ptr: *mut i8,
+            a: int32x2_t,
+            b: int32x2_t,
+            c: int32x2_t,
+            d: int32x2_t,
+            size: i32,
+        );
     }
-    _vst2_lane_s32(a as _, b.0, b.1, LANE, 4)
+    _vst4_s32(a as _, b.0, b.1, b.2, b.3, 4)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s32)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst2q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x2_t) {
-    static_assert_uimm_bits!(LANE, 2);
+#[cfg_attr(test, assert_instr(vst4))]
+pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v4i32.p0")]
-        fn _vst2q_lane_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, n: i32, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4i32")]
+        fn _vst4q_s32(
+            ptr: *mut i8,
+            a: int32x4_t,
+            b: int32x4_t,
+            c: int32x4_t,
+            d: int32x4_t,
+            size: i32,
+        );
     }
-    _vst2q_lane_s32(a as _, b.0, b.1, LANE, 4)
+    _vst4q_s32(a as _, b.0, b.1, b.2, b.3, 4)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u8)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st2, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst2_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8x2_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    vst2_lane_s8::<LANE>(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) {
+    crate::core_arch::macros::interleaving_store!(f32, 2, 4, a, b)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u16)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st2, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst2_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4x2_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    vst2_lane_s16::<LANE>(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) {
+    crate::core_arch::macros::interleaving_store!(f32, 4, 4, a, b)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u16)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st2, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst2q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8x2_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    vst2q_lane_s16::<LANE>(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) {
+    crate::core_arch::macros::interleaving_store!(i8, 8, 4, a, b)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u32)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st2, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst2_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    vst2_lane_s32::<LANE>(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) {
+    crate::core_arch::macros::interleaving_store!(i8, 16, 4, a, b)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st2, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst2q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4x2_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    vst2q_lane_s32::<LANE>(transmute(a), transmute(b))
-}
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st2, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst2_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x8x2_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    vst2_lane_s8::<LANE>(transmute(a), transmute(b))
-}
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st2, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst2_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x4x2_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    vst2_lane_s16::<LANE>(transmute(a), transmute(b))
-}
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st2, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst2q_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x8x2_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    vst2q_lane_s16::<LANE>(transmute(a), transmute(b))
-}
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst2_p64(a: *mut p64, b: poly64x1x2_t) {
-    vst2_s64(transmute(a), transmute(b))
-}
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) {
-    core::ptr::write_unaligned(a.cast(), b)
-}
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s64)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) {
-    core::ptr::write_unaligned(a.cast(), b)
-}
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst2_u64(a: *mut u64, b: uint64x1x2_t) {
-    vst2_s64(transmute(a), transmute(b))
-}
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst2_u8(a: *mut u8, b: uint8x8x2_t) {
-    vst2_s8(transmute(a), transmute(b))
-}
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst2q_u8(a: *mut u8, b: uint8x16x2_t) {
-    vst2q_s8(transmute(a), transmute(b))
-}
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst2_u16(a: *mut u16, b: uint16x4x2_t) {
-    vst2_s16(transmute(a), transmute(b))
-}
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst2q_u16(a: *mut u16, b: uint16x8x2_t) {
-    vst2q_s16(transmute(a), transmute(b))
-}
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst2_u32(a: *mut u32, b: uint32x2x2_t) {
-    vst2_s32(transmute(a), transmute(b))
-}
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst2q_u32(a: *mut u32, b: uint32x4x2_t) {
-    vst2q_s32(transmute(a), transmute(b))
-}
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst2_p8(a: *mut p8, b: poly8x8x2_t) {
-    vst2_s8(transmute(a), transmute(b))
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) {
+    crate::core_arch::macros::interleaving_store!(i16, 4, 4, a, b)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p8)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst2q_p8(a: *mut p8, b: poly8x16x2_t) {
-    vst2q_s8(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) {
+    crate::core_arch::macros::interleaving_store!(i16, 8, 4, a, b)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p16)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst2_p16(a: *mut p16, b: poly16x4x2_t) {
-    vst2_s16(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) {
+    crate::core_arch::macros::interleaving_store!(i32, 2, 4, a, b)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p16)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst2q_p16(a: *mut p16, b: poly16x8x2_t) {
-    vst2q_s16(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) {
+    crate::core_arch::macros::interleaving_store!(i32, 4, 4, a, b)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f16)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(vst3))]
-pub unsafe fn vst3_f16(a: *mut f16, b: float16x4x3_t) {
+pub unsafe fn vst4_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v4f16")]
-        fn _vst3_f16(ptr: *mut i8, a: float16x4_t, b: float16x4_t, c: float16x4_t, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4f16")]
+        fn _vst4_lane_f16(
+            ptr: *mut i8,
+            a: float16x4_t,
+            b: float16x4_t,
+            c: float16x4_t,
+            d: float16x4_t,
+            n: i32,
+            size: i32,
+        );
     }
-    _vst3_f16(a as _, b.0, b.1, b.2, 2)
+    _vst4_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f16)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(vst3))]
-pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) {
+pub unsafe fn vst4q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x4_t) {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v8f16")]
-        fn _vst3q_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, c: float16x8_t, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v8f16")]
+        fn _vst4q_lane_f16(
+            ptr: *mut i8,
+            a: float16x8_t,
+            b: float16x8_t,
+            c: float16x8_t,
+            d: float16x8_t,
+            n: i32,
+            size: i32,
+        );
     }
-    _vst3q_f16(a as _, b.0, b.1, b.2, 2)
+    _vst4q_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f16)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(st3))]
-pub unsafe fn vst3_f16(a: *mut f16, b: float16x4x3_t) {
+pub unsafe fn vst4_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3.v4f16.p0"
+            link_name = "llvm.aarch64.neon.st4lane.v4f16.p0"
         )]
-        fn _vst3_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, ptr: *mut i8);
+        fn _vst4_lane_f16(
+            a: float16x4_t,
+            b: float16x4_t,
+            c: float16x4_t,
+            d: float16x4_t,
+            n: i64,
+            ptr: *mut i8,
+        );
     }
-    _vst3_f16(b.0, b.1, b.2, a as _)
+    _vst4_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f16)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(st3))]
-pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) {
+pub unsafe fn vst4q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x4_t) {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3.v8f16.p0"
+            link_name = "llvm.aarch64.neon.st4lane.v8f16.p0"
         )]
-        fn _vst3q_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, ptr: *mut i8);
+        fn _vst4q_lane_f16(
+            a: float16x8_t,
+            b: float16x8_t,
+            c: float16x8_t,
+            d: float16x8_t,
+            n: i64,
+            ptr: *mut i8,
+        );
     }
-    _vst3q_f16(b.0, b.1, b.2, a as _)
+    _vst4q_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst3))]
-pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) {
-    crate::core_arch::macros::interleaving_store!(f32, 2, 3, a, b)
+pub unsafe fn vst4_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x4_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v2f32")]
+        fn _vst4_lane_f32(
+            ptr: *mut i8,
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            d: float32x2_t,
+            n: i32,
+            size: i32,
+        );
+    }
+    _vst4_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst3))]
-pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) {
-    crate::core_arch::macros::interleaving_store!(f32, 4, 3, a, b)
-}
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst3))]
-pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) {
-    crate::core_arch::macros::interleaving_store!(i8, 8, 3, a, b)
-}
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst3))]
-pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) {
-    crate::core_arch::macros::interleaving_store!(i8, 16, 3, a, b)
-}
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst3))]
-pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) {
-    crate::core_arch::macros::interleaving_store!(i16, 4, 3, a, b)
-}
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst3))]
-pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) {
-    crate::core_arch::macros::interleaving_store!(i16, 8, 3, a, b)
-}
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst3))]
-pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) {
-    crate::core_arch::macros::interleaving_store!(i32, 2, 3, a, b)
-}
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst3))]
-pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) {
-    crate::core_arch::macros::interleaving_store!(i32, 4, 3, a, b)
-}
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st3))]
-pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) {
-    crate::core_arch::macros::interleaving_store!(f32, 2, 3, a, b)
-}
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st3))]
-pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) {
-    crate::core_arch::macros::interleaving_store!(f32, 4, 3, a, b)
-}
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st3))]
-pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) {
-    crate::core_arch::macros::interleaving_store!(i8, 8, 3, a, b)
-}
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st3))]
-pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) {
-    crate::core_arch::macros::interleaving_store!(i8, 16, 3, a, b)
-}
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st3))]
-pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) {
-    crate::core_arch::macros::interleaving_store!(i16, 4, 3, a, b)
-}
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st3))]
-pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) {
-    crate::core_arch::macros::interleaving_store!(i16, 8, 3, a, b)
-}
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st3))]
-pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) {
-    crate::core_arch::macros::interleaving_store!(i32, 2, 3, a, b)
-}
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st3))]
-pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) {
-    crate::core_arch::macros::interleaving_store!(i32, 4, 3, a, b)
-}
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vst3_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x3_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4f16")]
-        fn _vst3_lane_f16(
-            ptr: *mut i8,
-            a: float16x4_t,
-            b: float16x4_t,
-            c: float16x4_t,
-            n: i32,
-            size: i32,
-        );
-    }
-    _vst3_lane_f16(a as _, b.0, b.1, b.2, LANE, 4)
-}
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vst3q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x3_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v8f16")]
-        fn _vst3q_lane_f16(
-            ptr: *mut i8,
-            a: float16x8_t,
-            b: float16x8_t,
-            c: float16x8_t,
-            n: i32,
-            size: i32,
-        );
-    }
-    _vst3q_lane_f16(a as _, b.0, b.1, b.2, LANE, 4)
-}
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vst3_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x3_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3lane.v4f16.p0"
-        )]
-        fn _vst3_lane_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, n: i64, ptr: *mut i8);
-    }
-    _vst3_lane_f16(b.0, b.1, b.2, LANE as i64, a as _)
-}
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vst3q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x3_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3lane.v8f16.p0"
-        )]
-        fn _vst3q_lane_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, n: i64, ptr: *mut i8);
-    }
-    _vst3q_lane_f16(b.0, b.1, b.2, LANE as i64, a as _)
-}
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst3_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x3_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v2f32")]
-        fn _vst3_lane_f32(
-            ptr: *mut i8,
-            a: float32x2_t,
-            b: float32x2_t,
-            c: float32x2_t,
-            n: i32,
-            size: i32,
-        );
-    }
-    _vst3_lane_f32(a as _, b.0, b.1, b.2, LANE, 4)
-}
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst3q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x3_t) {
+pub unsafe fn vst4q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x4_t) {
     static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4f32")]
-        fn _vst3q_lane_f32(
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4f32")]
+        fn _vst4q_lane_f32(
             ptr: *mut i8,
             a: float32x4_t,
             b: float32x4_t,
             c: float32x4_t,
+            d: float32x4_t,
             n: i32,
             size: i32,
         );
     }
-    _vst3q_lane_f32(a as _, b.0, b.1, b.2, LANE, 4)
+    _vst4q_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s8)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
+#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst3_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x3_t) {
+pub unsafe fn vst4_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x4_t) {
     static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v8i8")]
-        fn _vst3_lane_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i32, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v8i8")]
+        fn _vst4_lane_s8(
+            ptr: *mut i8,
+            a: int8x8_t,
+            b: int8x8_t,
+            c: int8x8_t,
+            d: int8x8_t,
+            n: i32,
+            size: i32,
+        );
     }
-    _vst3_lane_s8(a as _, b.0, b.1, b.2, LANE, 1)
+    _vst4_lane_s8(a as _, b.0, b.1, b.2, b.3, LANE, 1)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s16)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
+#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst3_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x3_t) {
+pub unsafe fn vst4_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x4_t) {
     static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4i16")]
-        fn _vst3_lane_s16(
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4i16")]
+        fn _vst4_lane_s16(
             ptr: *mut i8,
             a: int16x4_t,
             b: int16x4_t,
             c: int16x4_t,
+            d: int16x4_t,
             n: i32,
             size: i32,
         );
     }
-    _vst3_lane_s16(a as _, b.0, b.1, b.2, LANE, 2)
+    _vst4_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s16)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
+#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst3q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x3_t) {
+pub unsafe fn vst4q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x4_t) {
     static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v8i16")]
-        fn _vst3q_lane_s16(
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v8i16")]
+        fn _vst4q_lane_s16(
             ptr: *mut i8,
             a: int16x8_t,
             b: int16x8_t,
             c: int16x8_t,
+            d: int16x8_t,
             n: i32,
             size: i32,
         );
     }
-    _vst3q_lane_s16(a as _, b.0, b.1, b.2, LANE, 2)
+    _vst4q_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s32)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
+#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst3_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x3_t) {
+pub unsafe fn vst4_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x4_t) {
     static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v2i32")]
-        fn _vst3_lane_s32(
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v2i32")]
+        fn _vst4_lane_s32(
             ptr: *mut i8,
             a: int32x2_t,
             b: int32x2_t,
             c: int32x2_t,
+            d: int32x2_t,
             n: i32,
             size: i32,
         );
     }
-    _vst3_lane_s32(a as _, b.0, b.1, b.2, LANE, 4)
+    _vst4_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s32)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
+#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst3q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x3_t) {
+pub unsafe fn vst4q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x4_t) {
     static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0.v4i32")]
-        fn _vst3q_lane_s32(
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4i32")]
+        fn _vst4q_lane_s32(
             ptr: *mut i8,
             a: int32x4_t,
             b: int32x4_t,
             c: int32x4_t,
+            d: int32x4_t,
             n: i32,
             size: i32,
         );
     }
-    _vst3q_lane_s32(a as _, b.0, b.1, b.2, LANE, 4)
+    _vst4q_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f32)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x3_t) {
+pub unsafe fn vst4_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x4_t) {
     static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3lane.v2f32.p0"
+            link_name = "llvm.aarch64.neon.st4lane.v2f32.p0"
         )]
-        fn _vst3_lane_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t, n: i64, ptr: *mut i8);
+        fn _vst4_lane_f32(
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            d: float32x2_t,
+            n: i64,
+            ptr: *mut i8,
+        );
     }
-    _vst3_lane_f32(b.0, b.1, b.2, LANE as i64, a as _)
+    _vst4_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f32)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x3_t) {
+pub unsafe fn vst4q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x4_t) {
     static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3lane.v4f32.p0"
+            link_name = "llvm.aarch64.neon.st4lane.v4f32.p0"
         )]
-        fn _vst3q_lane_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t, n: i64, ptr: *mut i8);
+        fn _vst4q_lane_f32(
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            d: float32x4_t,
+            n: i64,
+            ptr: *mut i8,
+        );
     }
-    _vst3q_lane_f32(b.0, b.1, b.2, LANE as i64, a as _)
+    _vst4q_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s8)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x3_t) {
+pub unsafe fn vst4_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x4_t) {
     static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3lane.v8i8.p0"
+            link_name = "llvm.aarch64.neon.st4lane.v8i8.p0"
         )]
-        fn _vst3_lane_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i64, ptr: *mut i8);
+        fn _vst4_lane_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, n: i64, ptr: *mut i8);
     }
-    _vst3_lane_s8(b.0, b.1, b.2, LANE as i64, a as _)
+    _vst4_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s16)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x3_t) {
+pub unsafe fn vst4_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x4_t) {
     static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3lane.v4i16.p0"
+            link_name = "llvm.aarch64.neon.st4lane.v4i16.p0"
         )]
-        fn _vst3_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t, n: i64, ptr: *mut i8);
+        fn _vst4_lane_s16(
+            a: int16x4_t,
+            b: int16x4_t,
+            c: int16x4_t,
+            d: int16x4_t,
+            n: i64,
+            ptr: *mut i8,
+        );
     }
-    _vst3_lane_s16(b.0, b.1, b.2, LANE as i64, a as _)
+    _vst4_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x4_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4lane.v8i16.p0"
+        )]
+        fn _vst4q_lane_s16(
+            a: int16x8_t,
+            b: int16x8_t,
+            c: int16x8_t,
+            d: int16x8_t,
+            n: i64,
+            ptr: *mut i8,
+        );
+    }
+    _vst4q_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x4_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4lane.v2i32.p0"
+        )]
+        fn _vst4_lane_s32(
+            a: int32x2_t,
+            b: int32x2_t,
+            c: int32x2_t,
+            d: int32x2_t,
+            n: i64,
+            ptr: *mut i8,
+        );
+    }
+    _vst4_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4lane.v4i32.p0"
+        )]
+        fn _vst4q_lane_s32(
+            a: int32x4_t,
+            b: int32x4_t,
+            c: int32x4_t,
+            d: int32x4_t,
+            n: i64,
+            ptr: *mut i8,
+        );
+    }
+    _vst4q_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8x4_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    vst4_lane_s8::<LANE>(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    vst4_lane_s16::<LANE>(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8x4_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    vst4q_lane_s16::<LANE>(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2x4_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    vst4_lane_s32::<LANE>(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    vst4q_lane_s32::<LANE>(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x8x4_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    vst4_lane_s8::<LANE>(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x4x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    vst4_lane_s16::<LANE>(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4q_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x8x4_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    vst4q_lane_s16::<LANE>(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4_p64(a: *mut p64, b: poly64x1x4_t) {
+    vst4_s64(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) {
+    core::ptr::write_unaligned(a.cast(), b)
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) {
+    core::ptr::write_unaligned(a.cast(), b)
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4_u64(a: *mut u64, b: uint64x1x4_t) {
+    vst4_s64(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4_u8(a: *mut u8, b: uint8x8x4_t) {
+    vst4_s8(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4q_u8(a: *mut u8, b: uint8x16x4_t) {
+    vst4q_s8(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4_u16(a: *mut u16, b: uint16x4x4_t) {
+    vst4_s16(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4q_u16(a: *mut u16, b: uint16x8x4_t) {
+    vst4q_s16(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4_u32(a: *mut u32, b: uint32x2x4_t) {
+    vst4_s32(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4q_u32(a: *mut u32, b: uint32x4x4_t) {
+    vst4q_s32(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4_p8(a: *mut p8, b: poly8x8x4_t) {
+    vst4_s8(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4q_p8(a: *mut p8, b: poly8x16x4_t) {
+    vst4q_s8(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4_p16(a: *mut p16, b: poly16x4x4_t) {
+    vst4_s16(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4q_p16(a: *mut p16, b: poly16x8x4_t) {
+    vst4q_s16(transmute(a), transmute(b))
+}
+#[doc = "Store SIMD&FP register (immediate offset)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstrq_p128)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vstrq_p128(a: *mut p128, b: p128) {
+    *a = b
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f16)"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fsub)
+)]
+#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vsub_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe { simd_sub(a, b) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f16)"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fsub)
+)]
+#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vsubq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe { simd_sub(a, b) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vsub_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe { simd_sub(a, b) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vsubq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe { simd_sub(a, b) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe { simd_sub(a, b) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe { simd_sub(a, b) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe { simd_sub(a, b) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe { simd_sub(a, b) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe { simd_sub(a, b) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe { simd_sub(a, b) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe { simd_sub(a, b) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe { simd_sub(a, b) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    unsafe { simd_sub(a, b) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe { simd_sub(a, b) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    unsafe { simd_sub(a, b) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe { simd_sub(a, b) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe { simd_sub(a, b) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe { simd_sub(a, b) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe { simd_sub(a, b) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe { simd_sub(a, b) }
+}
+#[doc = "Subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(subhn2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
+    let d = vsubhn_s16(b, c);
+    vcombine_s8(a, d)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x3_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3lane.v8i16.p0"
-        )]
-        fn _vst3q_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t, n: i64, ptr: *mut i8);
-    }
-    _vst3q_lane_s16(b.0, b.1, b.2, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(subhn2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
+    let d = vsubhn_s32(b, c);
+    vcombine_s16(a, d)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x3_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3lane.v2i32.p0"
-        )]
-        fn _vst3_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t, n: i64, ptr: *mut i8);
-    }
-    _vst3_lane_s32(b.0, b.1, b.2, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(subhn2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
+    let d = vsubhn_s64(b, c);
+    vcombine_s32(a, d)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x3_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3lane.v4i32.p0"
-        )]
-        fn _vst3q_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t, n: i64, ptr: *mut i8);
-    }
-    _vst3q_lane_s32(b.0, b.1, b.2, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(subhn2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
+    let d = vsubhn_u16(b, c);
+    vcombine_u8(a, d)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st3, LANE = 0)
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(subhn2)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -63606,23 +65292,24 @@ pub unsafe fn vst3q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x3_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst3_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8x3_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    vst3_lane_s8::<LANE>(transmute(a), transmute(b))
+pub fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
+    let d = vsubhn_u32(b, c);
+    vcombine_u16(a, d)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st3, LANE = 0)
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(subhn2)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -63631,23 +65318,20 @@ pub unsafe fn vst3_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8x3_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst3_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4x3_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    vst3_lane_s16::<LANE>(transmute(a), transmute(b))
+pub fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
+    let d = vsubhn_u64(b, c);
+    vcombine_u32(a, d)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st3, LANE = 0)
+    assert_instr(subhn)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -63656,23 +65340,20 @@ pub unsafe fn vst3_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4x3_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst3q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8x3_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    vst3q_lane_s16::<LANE>(transmute(a), transmute(b))
+pub fn vsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
+    let c: i16x8 = i16x8::new(8, 8, 8, 8, 8, 8, 8, 8);
+    unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) }
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st3, LANE = 0)
+    assert_instr(subhn)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -63681,23 +65362,20 @@ pub unsafe fn vst3q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8x3_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst3_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2x3_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    vst3_lane_s32::<LANE>(transmute(a), transmute(b))
+pub fn vsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
+    let c: i32x4 = i32x4::new(16, 16, 16, 16);
+    unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) }
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st3, LANE = 0)
+    assert_instr(subhn)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -63706,23 +65384,20 @@ pub unsafe fn vst3_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2x3_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst3q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4x3_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    vst3q_lane_s32::<LANE>(transmute(a), transmute(b))
+pub fn vsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
+    let c: i64x2 = i64x2::new(32, 32);
+    unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) }
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st3, LANE = 0)
+    assert_instr(subhn)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -63731,23 +65406,20 @@ pub unsafe fn vst3q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4x3_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst3_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x8x3_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    vst3_lane_s8::<LANE>(transmute(a), transmute(b))
+pub fn vsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
+    let c: u16x8 = u16x8::new(8, 8, 8, 8, 8, 8, 8, 8);
+    unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) }
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st3, LANE = 0)
+    assert_instr(subhn)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -63756,23 +65428,20 @@ pub unsafe fn vst3_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x8x3_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst3_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x4x3_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    vst3_lane_s16::<LANE>(transmute(a), transmute(b))
+pub fn vsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
+    let c: u32x4 = u32x4::new(16, 16, 16, 16);
+    unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) }
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st3, LANE = 0)
+    assert_instr(subhn)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -63781,21 +65450,19 @@ pub unsafe fn vst3_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x4x3_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst3q_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x8x3_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    vst3q_lane_s16::<LANE>(transmute(a), transmute(b))
+pub fn vsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
+    let c: u64x2 = u64x2::new(32, 32);
+    unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) }
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Signed Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s8)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(ssubl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -63805,44 +65472,47 @@ pub unsafe fn vst3q_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x8x3_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst3_p64(a: *mut p64, b: poly64x1x3_t) {
-    vst3_s64(transmute(a), transmute(b))
+pub fn vsubl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
+    unsafe {
+        let c: int16x8_t = simd_cast(a);
+        let d: int16x8_t = simd_cast(b);
+        simd_sub(c, d)
+    }
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Signed Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) {
-    core::ptr::write_unaligned(a.cast(), b)
-}
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) {
-    core::ptr::write_unaligned(a.cast(), b)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ssubl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vsubl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
+    unsafe {
+        let c: int32x4_t = simd_cast(a);
+        let d: int32x4_t = simd_cast(b);
+        simd_sub(c, d)
+    }
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Signed Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(ssubl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -63852,20 +65522,22 @@ pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst3_u64(a: *mut u64, b: uint64x1x3_t) {
-    vst3_s64(transmute(a), transmute(b))
+pub fn vsubl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
+    unsafe {
+        let c: int64x2_t = simd_cast(a);
+        let d: int64x2_t = simd_cast(b);
+        simd_sub(c, d)
+    }
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Unsigned Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st3)
+    assert_instr(usubl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -63875,20 +65547,22 @@ pub unsafe fn vst3_u64(a: *mut u64, b: uint64x1x3_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst3_u8(a: *mut u8, b: uint8x8x3_t) {
-    vst3_s8(transmute(a), transmute(b))
+pub fn vsubl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
+    unsafe {
+        let c: uint16x8_t = simd_cast(a);
+        let d: uint16x8_t = simd_cast(b);
+        simd_sub(c, d)
+    }
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Unsigned Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st3)
+    assert_instr(usubl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -63898,20 +65572,22 @@ pub unsafe fn vst3_u8(a: *mut u8, b: uint8x8x3_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst3q_u8(a: *mut u8, b: uint8x16x3_t) {
-    vst3q_s8(transmute(a), transmute(b))
+pub fn vsubl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
+    unsafe {
+        let c: uint32x4_t = simd_cast(a);
+        let d: uint32x4_t = simd_cast(b);
+        simd_sub(c, d)
+    }
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Unsigned Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st3)
+    assert_instr(usubl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -63921,20 +65597,22 @@ pub unsafe fn vst3q_u8(a: *mut u8, b: uint8x16x3_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst3_u16(a: *mut u16, b: uint16x4x3_t) {
-    vst3_s16(transmute(a), transmute(b))
+pub fn vsubl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
+    unsafe {
+        let c: uint64x2_t = simd_cast(a);
+        let d: uint64x2_t = simd_cast(b);
+        simd_sub(c, d)
+    }
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Signed Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st3)
+    assert_instr(ssubw)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -63944,20 +65622,18 @@ pub unsafe fn vst3_u16(a: *mut u16, b: uint16x4x3_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst3q_u16(a: *mut u16, b: uint16x8x3_t) {
-    vst3q_s16(transmute(a), transmute(b))
+pub fn vsubw_s8(a: int16x8_t, b: int8x8_t) -> int16x8_t {
+    unsafe { simd_sub(a, simd_cast(b)) }
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Signed Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st3)
+    assert_instr(ssubw)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -63967,20 +65643,18 @@ pub unsafe fn vst3q_u16(a: *mut u16, b: uint16x8x3_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst3_u32(a: *mut u32, b: uint32x2x3_t) {
-    vst3_s32(transmute(a), transmute(b))
+pub fn vsubw_s16(a: int32x4_t, b: int16x4_t) -> int32x4_t {
+    unsafe { simd_sub(a, simd_cast(b)) }
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Signed Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st3)
+    assert_instr(ssubw)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -63990,20 +65664,18 @@ pub unsafe fn vst3_u32(a: *mut u32, b: uint32x2x3_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst3q_u32(a: *mut u32, b: uint32x4x3_t) {
-    vst3q_s32(transmute(a), transmute(b))
+pub fn vsubw_s32(a: int64x2_t, b: int32x2_t) -> int64x2_t {
+    unsafe { simd_sub(a, simd_cast(b)) }
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Unsigned Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st3)
+    assert_instr(usubw)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -64013,20 +65685,18 @@ pub unsafe fn vst3q_u32(a: *mut u32, b: uint32x4x3_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst3_p8(a: *mut p8, b: poly8x8x3_t) {
-    vst3_s8(transmute(a), transmute(b))
+pub fn vsubw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t {
+    unsafe { simd_sub(a, simd_cast(b)) }
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Unsigned Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st3)
+    assert_instr(usubw)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -64036,1261 +65706,771 @@ pub unsafe fn vst3_p8(a: *mut p8, b: poly8x8x3_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst3q_p8(a: *mut p8, b: poly8x16x3_t) {
-    vst3q_s8(transmute(a), transmute(b))
+pub fn vsubw_u16(a: uint32x4_t, b: uint16x4_t) -> uint32x4_t {
+    unsafe { simd_sub(a, simd_cast(b)) }
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Unsigned Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usubw)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t {
+    unsafe { simd_sub(a, simd_cast(b)) }
+}
+#[doc = "Dot product index form with signed and unsigned integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_lane_s32)"]
+#[inline]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st3)
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(sudot, LANE = 0)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst3_p16(a: *mut p16, b: poly16x4x3_t) {
-    vst3_s16(transmute(a), transmute(b))
+pub fn vsudot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c = vreinterpret_u32_u8(c);
+    let c = vdup_lane_u32::<LANE>(c);
+    vusdot_s32(a, vreinterpret_u8_u32(c), b)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Dot product index form with signed and unsigned integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_lane_s32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st3)
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(sudot, LANE = 0)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst3q_p16(a: *mut p16, b: poly16x8x3_t) {
-    vst3q_s16(transmute(a), transmute(b))
+pub fn vsudotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c = vreinterpret_u32_u8(c);
+    let c = vdupq_lane_u32::<LANE>(c);
+    vusdotq_s32(a, vreinterpretq_u8_u32(c), b)
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Dot product index form with signed and unsigned integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_laneq_s32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(vst4))]
-pub unsafe fn vst4_f16(a: *mut f16, b: float16x4x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4f16")]
-        fn _vst4_f16(
-            ptr: *mut i8,
-            a: float16x4_t,
-            b: float16x4_t,
-            c: float16x4_t,
-            d: float16x4_t,
-            size: i32,
-        );
-    }
-    _vst4_f16(a as _, b.0, b.1, b.2, b.3, 2)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 1))]
+#[cfg_attr(
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(sudot, LANE = 3)
+)]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
+pub fn vsudot_laneq_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x16_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c = vreinterpretq_u32_u8(c);
+    let c = vdup_laneq_u32::<LANE>(c);
+    vusdot_s32(a, vreinterpret_u8_u32(c), b)
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Dot product index form with signed and unsigned integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_laneq_s32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(vst4))]
-pub unsafe fn vst4q_f16(a: *mut f16, b: float16x8x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v8f16")]
-        fn _vst4q_f16(
-            ptr: *mut i8,
-            a: float16x8_t,
-            b: float16x8_t,
-            c: float16x8_t,
-            d: float16x8_t,
-            size: i32,
-        );
-    }
-    _vst4q_f16(a as _, b.0, b.1, b.2, b.3, 2)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 1))]
+#[cfg_attr(
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(sudot, LANE = 3)
+)]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
+pub fn vsudotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: uint8x16_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c = vreinterpretq_u32_u8(c);
+    let c = vdupq_laneq_u32::<LANE>(c);
+    vusdotq_s32(a, vreinterpretq_u8_u32(c), b)
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(st4))]
-pub unsafe fn vst4_f16(a: *mut f16, b: float16x4x4_t) {
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+fn vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v4f16.p0"
-        )]
-        fn _vst4_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t, d: float16x4_t, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl1")]
+        fn _vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-    _vst4_f16(b.0, b.1, b.2, b.3, a as _)
+    unsafe { _vtbl1(a, b) }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(st4))]
-pub unsafe fn vst4q_f16(a: *mut f16, b: float16x8x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v8f16.p0"
-        )]
-        fn _vst4q_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t, d: float16x8_t, ptr: *mut i8);
-    }
-    _vst4q_f16(b.0, b.1, b.2, b.3, a as _)
-}
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
 #[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst4))]
-pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v2f32")]
-        fn _vst4_f32(
-            ptr: *mut i8,
-            a: float32x2_t,
-            b: float32x2_t,
-            c: float32x2_t,
-            d: float32x2_t,
-            size: i32,
-        );
-    }
-    _vst4_f32(a as _, b.0, b.1, b.2, b.3, 4)
+#[cfg_attr(test, assert_instr(vtbl))]
+pub fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    vtbl1(a, b)
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_u8)"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst4))]
-pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4f32")]
-        fn _vst4q_f32(
-            ptr: *mut i8,
-            a: float32x4_t,
-            b: float32x4_t,
-            c: float32x4_t,
-            d: float32x4_t,
-            size: i32,
-        );
-    }
-    _vst4q_f32(a as _, b.0, b.1, b.2, b.3, 4)
+#[cfg_attr(test, assert_instr(vtbl))]
+pub fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe { transmute(vtbl1(transmute(a), transmute(b))) }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_p8)"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst4))]
-pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v8i8")]
-        fn _vst4_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, size: i32);
-    }
-    _vst4_s8(a as _, b.0, b.1, b.2, b.3, 1)
+#[cfg_attr(test, assert_instr(vtbl))]
+pub fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t {
+    unsafe { transmute(vtbl1(transmute(a), transmute(b))) }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2)"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst4))]
-pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) {
+#[cfg_attr(test, assert_instr(vtbl))]
+fn vtbl2(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v16i8")]
-        fn _vst4q_s8(
-            ptr: *mut i8,
-            a: int8x16_t,
-            b: int8x16_t,
-            c: int8x16_t,
-            d: int8x16_t,
-            size: i32,
-        );
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl2")]
+        fn _vtbl2(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
     }
-    _vst4q_s8(a as _, b.0, b.1, b.2, b.3, 1)
+    unsafe { _vtbl2(a, b, c) }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_s8)"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst4))]
-pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4i16")]
-        fn _vst4_s16(
-            ptr: *mut i8,
-            a: int16x4_t,
-            b: int16x4_t,
-            c: int16x4_t,
-            d: int16x4_t,
-            size: i32,
-        );
-    }
-    _vst4_s16(a as _, b.0, b.1, b.2, b.3, 2)
+#[cfg_attr(test, assert_instr(vtbl))]
+pub fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t {
+    vtbl2(a.0, a.1, b)
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst4))]
-pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v8i16")]
-        fn _vst4q_s16(
-            ptr: *mut i8,
-            a: int16x8_t,
-            b: int16x8_t,
-            c: int16x8_t,
-            d: int16x8_t,
-            size: i32,
-        );
-    }
-    _vst4q_s16(a as _, b.0, b.1, b.2, b.3, 2)
+#[cfg_attr(test, assert_instr(vtbl))]
+pub fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe { transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst4))]
-pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v2i32")]
-        fn _vst4_s32(
-            ptr: *mut i8,
-            a: int32x2_t,
-            b: int32x2_t,
-            c: int32x2_t,
-            d: int32x2_t,
-            size: i32,
-        );
-    }
-    _vst4_s32(a as _, b.0, b.1, b.2, b.3, 4)
+#[cfg_attr(test, assert_instr(vtbl))]
+pub fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t {
+    unsafe { transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3)"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vst4))]
-pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) {
+#[cfg_attr(test, assert_instr(vtbl))]
+fn vtbl3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v4i32")]
-        fn _vst4q_s32(
-            ptr: *mut i8,
-            a: int32x4_t,
-            b: int32x4_t,
-            c: int32x4_t,
-            d: int32x4_t,
-            size: i32,
-        );
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl3")]
+        fn _vtbl3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t;
     }
-    _vst4q_s32(a as _, b.0, b.1, b.2, b.3, 4)
-}
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st4))]
-pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) {
-    crate::core_arch::macros::interleaving_store!(f32, 2, 4, a, b)
-}
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st4))]
-pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) {
-    crate::core_arch::macros::interleaving_store!(f32, 4, 4, a, b)
-}
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st4))]
-pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) {
-    crate::core_arch::macros::interleaving_store!(i8, 8, 4, a, b)
-}
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st4))]
-pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) {
-    crate::core_arch::macros::interleaving_store!(i8, 16, 4, a, b)
-}
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st4))]
-pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) {
-    crate::core_arch::macros::interleaving_store!(i16, 4, 4, a, b)
-}
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st4))]
-pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) {
-    crate::core_arch::macros::interleaving_store!(i16, 8, 4, a, b)
-}
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st4))]
-pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) {
-    crate::core_arch::macros::interleaving_store!(i32, 2, 4, a, b)
-}
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st4))]
-pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) {
-    crate::core_arch::macros::interleaving_store!(i32, 4, 4, a, b)
+    unsafe { _vtbl3(a, b, c, d) }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vst4_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4f16")]
-        fn _vst4_lane_f16(
-            ptr: *mut i8,
-            a: float16x4_t,
-            b: float16x4_t,
-            c: float16x4_t,
-            d: float16x4_t,
-            n: i32,
-            size: i32,
-        );
-    }
-    _vst4_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
-}
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+pub fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t {
+    vtbl3(a.0, a.1, a.2, b)
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vst4q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x4_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v8f16")]
-        fn _vst4q_lane_f16(
-            ptr: *mut i8,
-            a: float16x8_t,
-            b: float16x8_t,
-            c: float16x8_t,
-            d: float16x8_t,
-            n: i32,
-            size: i32,
-        );
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+pub fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe {
+        transmute(vtbl3(
+            transmute(a.0),
+            transmute(a.1),
+            transmute(a.2),
+            transmute(b),
+        ))
     }
-    _vst4q_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vst4_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4lane.v4f16.p0"
-        )]
-        fn _vst4_lane_f16(
-            a: float16x4_t,
-            b: float16x4_t,
-            c: float16x4_t,
-            d: float16x4_t,
-            n: i64,
-            ptr: *mut i8,
-        );
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+pub fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t {
+    unsafe {
+        transmute(vtbl3(
+            transmute(a.0),
+            transmute(a.1),
+            transmute(a.2),
+            transmute(b),
+        ))
     }
-    _vst4_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vst4q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x4_t) {
-    static_assert_uimm_bits!(LANE, 3);
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+fn vtbl4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4lane.v8f16.p0"
-        )]
-        fn _vst4q_lane_f16(
-            a: float16x8_t,
-            b: float16x8_t,
-            c: float16x8_t,
-            d: float16x8_t,
-            n: i64,
-            ptr: *mut i8,
-        );
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl4")]
+        fn _vtbl4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t;
     }
-    _vst4q_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    unsafe { _vtbl4(a, b, c, d, e) }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_s8)"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst4_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x4_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v2f32")]
-        fn _vst4_lane_f32(
-            ptr: *mut i8,
-            a: float32x2_t,
-            b: float32x2_t,
-            c: float32x2_t,
-            d: float32x2_t,
-            n: i32,
-            size: i32,
-        );
-    }
-    _vst4_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+#[cfg_attr(test, assert_instr(vtbl))]
+pub fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t {
+    vtbl4(a.0, a.1, a.2, a.3, b)
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst4q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4f32")]
-        fn _vst4q_lane_f32(
-            ptr: *mut i8,
-            a: float32x4_t,
-            b: float32x4_t,
-            c: float32x4_t,
-            d: float32x4_t,
-            n: i32,
-            size: i32,
-        );
+#[cfg_attr(test, assert_instr(vtbl))]
+pub fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe {
+        transmute(vtbl4(
+            transmute(a.0),
+            transmute(a.1),
+            transmute(a.2),
+            transmute(a.3),
+            transmute(b),
+        ))
     }
-    _vst4q_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst4_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x4_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v8i8")]
-        fn _vst4_lane_s8(
-            ptr: *mut i8,
-            a: int8x8_t,
-            b: int8x8_t,
-            c: int8x8_t,
-            d: int8x8_t,
-            n: i32,
-            size: i32,
-        );
+#[cfg_attr(test, assert_instr(vtbl))]
+pub fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t {
+    unsafe {
+        transmute(vtbl4(
+            transmute(a.0),
+            transmute(a.1),
+            transmute(a.2),
+            transmute(a.3),
+            transmute(b),
+        ))
     }
-    _vst4_lane_s8(a as _, b.0, b.1, b.2, b.3, LANE, 1)
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1)"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst4_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
+#[cfg_attr(test, assert_instr(vtbx))]
+fn vtbx1(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4i16")]
-        fn _vst4_lane_s16(
-            ptr: *mut i8,
-            a: int16x4_t,
-            b: int16x4_t,
-            c: int16x4_t,
-            d: int16x4_t,
-            n: i32,
-            size: i32,
-        );
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx1")]
+        fn _vtbx1(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
     }
-    _vst4_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
+    unsafe { _vtbx1(a, b, c) }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_s8)"]
 #[inline]
+#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
+    vtbx1(a, b, c)
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_u8)"]
+#[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst4q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x4_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v8i16")]
-        fn _vst4q_lane_s16(
-            ptr: *mut i8,
-            a: int16x8_t,
-            b: int16x8_t,
-            c: int16x8_t,
-            d: int16x8_t,
-            n: i32,
-            size: i32,
-        );
-    }
-    _vst4q_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
+#[cfg_attr(test, assert_instr(vtbx))]
+pub fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
+    unsafe { transmute(vtbx1(transmute(a), transmute(b), transmute(c))) }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_p8)"]
 #[inline]
+#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t {
+    unsafe { transmute(vtbx1(transmute(a), transmute(b), transmute(c))) }
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2)"]
+#[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst4_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x4_t) {
-    static_assert_uimm_bits!(LANE, 1);
+#[cfg_attr(test, assert_instr(vtbx))]
+fn vtbx2(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v2i32")]
-        fn _vst4_lane_s32(
-            ptr: *mut i8,
-            a: int32x2_t,
-            b: int32x2_t,
-            c: int32x2_t,
-            d: int32x2_t,
-            n: i32,
-            size: i32,
-        );
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx2")]
+        fn _vtbx2(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t;
     }
-    _vst4_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+    unsafe { _vtbx2(a, b, c, d) }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_s8)"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst4q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0.v4i32")]
-        fn _vst4q_lane_s32(
-            ptr: *mut i8,
-            a: int32x4_t,
-            b: int32x4_t,
-            c: int32x4_t,
-            d: int32x4_t,
-            n: i32,
-            size: i32,
-        );
-    }
-    _vst4q_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+#[cfg_attr(test, assert_instr(vtbx))]
+pub fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t {
+    vtbx2(a, b.0, b.1, c)
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x4_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4lane.v2f32.p0"
-        )]
-        fn _vst4_lane_f32(
-            a: float32x2_t,
-            b: float32x2_t,
-            c: float32x2_t,
-            d: float32x2_t,
-            n: i64,
-            ptr: *mut i8,
-        );
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t {
+    unsafe {
+        transmute(vtbx2(
+            transmute(a),
+            transmute(b.0),
+            transmute(b.1),
+            transmute(c),
+        ))
     }
-    _vst4_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4lane.v4f32.p0"
-        )]
-        fn _vst4q_lane_f32(
-            a: float32x4_t,
-            b: float32x4_t,
-            c: float32x4_t,
-            d: float32x4_t,
-            n: i64,
-            ptr: *mut i8,
-        );
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t {
+    unsafe {
+        transmute(vtbx2(
+            transmute(a),
+            transmute(b.0),
+            transmute(b.1),
+            transmute(c),
+        ))
     }
-    _vst4q_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x4_t) {
-    static_assert_uimm_bits!(LANE, 3);
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+fn vtbx3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4lane.v8i8.p0"
-        )]
-        fn _vst4_lane_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, n: i64, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx3")]
+        fn _vtbx3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t;
     }
-    _vst4_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    unsafe { _vtbx3(a, b, c, d, e) }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_s8)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4lane.v4i16.p0"
-        )]
-        fn _vst4_lane_s16(
-            a: int16x4_t,
-            b: int16x4_t,
-            c: int16x4_t,
-            d: int16x4_t,
-            n: i64,
-            ptr: *mut i8,
-        );
-    }
-    _vst4_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t {
+    vtbx3(a, b.0, b.1, b.2, c)
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x4_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4lane.v8i16.p0"
-        )]
-        fn _vst4q_lane_s16(
-            a: int16x8_t,
-            b: int16x8_t,
-            c: int16x8_t,
-            d: int16x8_t,
-            n: i64,
-            ptr: *mut i8,
-        );
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t {
+    unsafe {
+        transmute(vtbx3(
+            transmute(a),
+            transmute(b.0),
+            transmute(b.1),
+            transmute(b.2),
+            transmute(c),
+        ))
     }
-    _vst4q_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x4_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4lane.v2i32.p0"
-        )]
-        fn _vst4_lane_s32(
-            a: int32x2_t,
-            b: int32x2_t,
-            c: int32x2_t,
-            d: int32x2_t,
-            n: i64,
-            ptr: *mut i8,
-        );
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t {
+    unsafe {
+        transmute(vtbx3(
+            transmute(a),
+            transmute(b.0),
+            transmute(b.1),
+            transmute(b.2),
+            transmute(c),
+        ))
     }
-    _vst4_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+fn vtbx4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t, f: int8x8_t) -> int8x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4lane.v4i32.p0"
-        )]
-        fn _vst4q_lane_s32(
-            a: int32x4_t,
-            b: int32x4_t,
-            c: int32x4_t,
-            d: int32x4_t,
-            n: i64,
-            ptr: *mut i8,
-        );
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx4")]
+        fn _vtbx4(
+            a: int8x8_t,
+            b: int8x8_t,
+            c: int8x8_t,
+            d: int8x8_t,
+            e: int8x8_t,
+            f: int8x8_t,
+        ) -> int8x8_t;
     }
-    _vst4q_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    unsafe { _vtbx4(a, b, c, d, e, f) }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_s8)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st4, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst4_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8x4_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    vst4_lane_s8::<LANE>(transmute(a), transmute(b))
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t {
+    unsafe {
+        vtbx4(
+            a,
+            transmute(b.0),
+            transmute(b.1),
+            transmute(b.2),
+            transmute(b.3),
+            c,
+        )
+    }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_s8)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st4, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst4_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    vst4_lane_s16::<LANE>(transmute(a), transmute(b))
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t {
+    let mut b: int8x8x4_t = b;
+    unsafe {
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+        b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+        b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+        b.3 = simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let c: int8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x8_t = vtbx4(
+            a,
+            transmute(b.0),
+            transmute(b.1),
+            transmute(b.2),
+            transmute(b.3),
+            c,
+        );
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st4, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst4q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8x4_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    vst4q_lane_s16::<LANE>(transmute(a), transmute(b))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t {
+    unsafe {
+        transmute(vtbx4(
+            transmute(a),
+            transmute(b.0),
+            transmute(b.1),
+            transmute(b.2),
+            transmute(b.3),
+            transmute(c),
+        ))
+    }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st4, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vst4_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2x4_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    vst4_lane_s32::<LANE>(transmute(a), transmute(b))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t {
+    unsafe {
+        transmute(vtbx4(
+            transmute(a),
+            transmute(b.0),
+            transmute(b.1),
+            transmute(b.2),
+            transmute(b.3),
+            transmute(c),
+        ))
+    }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st4, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    assert_instr(trn1)
 )]
-pub unsafe fn vst4q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    vst4q_lane_s32::<LANE>(transmute(a), transmute(b))
-}
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st4, LANE = 0)
+    assert_instr(trn2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst4_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x8x4_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    vst4_lane_s8::<LANE>(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vtrn_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t {
+    unsafe {
+        let a1: float16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+        let b1: float16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+        transmute((a1, b1))
+    }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st4, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    assert_instr(trn1)
 )]
-pub unsafe fn vst4_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x4x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    vst4_lane_s16::<LANE>(transmute(a), transmute(b))
-}
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st4, LANE = 0)
+    assert_instr(trn2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst4q_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x8x4_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    vst4q_lane_s16::<LANE>(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vtrn_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t {
+    unsafe {
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let a1: float16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+        let b1: float16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+        let mut ret_val: float16x4x2_t = transmute((a1, b1));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+        ret_val
+    }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_f16)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg(target_endian = "little")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    assert_instr(trn1)
 )]
-pub unsafe fn vst4_p64(a: *mut p64, b: poly64x1x4_t) {
-    vst4_s64(transmute(a), transmute(b))
-}
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) {
-    core::ptr::write_unaligned(a.cast(), b)
-}
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) {
-    core::ptr::write_unaligned(a.cast(), b)
-}
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(trn2)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst4_u64(a: *mut u64, b: uint64x1x4_t) {
-    vst4_s64(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vtrnq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t {
+    unsafe {
+        let a1: float16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+        let b1: float16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+        transmute((a1, b1))
+    }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st4)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    assert_instr(trn1)
 )]
-pub unsafe fn vst4_u8(a: *mut u8, b: uint8x8x4_t) {
-    vst4_s8(transmute(a), transmute(b))
-}
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st4)
+    assert_instr(trn2)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst4q_u8(a: *mut u8, b: uint8x16x4_t) {
-    vst4q_s8(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vtrnq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t {
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a1: float16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+        let b1: float16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+        let mut ret_val: float16x8x2_t = transmute((a1, b1));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val
+    }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st4)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    assert_instr(zip1)
 )]
-pub unsafe fn vst4_u16(a: *mut u16, b: uint16x4x4_t) {
-    vst4_s16(transmute(a), transmute(b))
-}
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st4)
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65300,20 +66480,27 @@ pub unsafe fn vst4_u16(a: *mut u16, b: uint16x4x4_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst4q_u16(a: *mut u16, b: uint16x8x4_t) {
-    vst4q_s16(transmute(a), transmute(b))
+pub fn vtrn_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
+    unsafe {
+        let a1: float32x2_t = simd_shuffle!(a, b, [0, 2]);
+        let b1: float32x2_t = simd_shuffle!(a, b, [1, 3]);
+        transmute((a1, b1))
+    }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st4)
+    assert_instr(zip1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65323,20 +66510,32 @@ pub unsafe fn vst4q_u16(a: *mut u16, b: uint16x8x4_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst4_u32(a: *mut u32, b: uint32x2x4_t) {
-    vst4_s32(transmute(a), transmute(b))
+pub fn vtrn_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
+    unsafe {
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let a1: float32x2_t = simd_shuffle!(a, b, [0, 2]);
+        let b1: float32x2_t = simd_shuffle!(a, b, [1, 3]);
+        let mut ret_val: float32x2x2_t = transmute((a1, b1));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+        ret_val
+    }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st4)
+    assert_instr(zip1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65346,20 +66545,27 @@ pub unsafe fn vst4_u32(a: *mut u32, b: uint32x2x4_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst4q_u32(a: *mut u32, b: uint32x4x4_t) {
-    vst4q_s32(transmute(a), transmute(b))
+pub fn vtrn_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
+    unsafe {
+        let a1: int32x2_t = simd_shuffle!(a, b, [0, 2]);
+        let b1: int32x2_t = simd_shuffle!(a, b, [1, 3]);
+        transmute((a1, b1))
+    }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st4)
+    assert_instr(zip1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65369,20 +66575,32 @@ pub unsafe fn vst4q_u32(a: *mut u32, b: uint32x4x4_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst4_p8(a: *mut p8, b: poly8x8x4_t) {
-    vst4_s8(transmute(a), transmute(b))
+pub fn vtrn_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
+    unsafe {
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: int32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let a1: int32x2_t = simd_shuffle!(a, b, [0, 2]);
+        let b1: int32x2_t = simd_shuffle!(a, b, [1, 3]);
+        let mut ret_val: int32x2x2_t = transmute((a1, b1));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+        ret_val
+    }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st4)
+    assert_instr(zip1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65392,20 +66610,27 @@ pub unsafe fn vst4_p8(a: *mut p8, b: poly8x8x4_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst4q_p8(a: *mut p8, b: poly8x16x4_t) {
-    vst4q_s8(transmute(a), transmute(b))
+pub fn vtrn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
+    unsafe {
+        let a1: uint32x2_t = simd_shuffle!(a, b, [0, 2]);
+        let b1: uint32x2_t = simd_shuffle!(a, b, [1, 3]);
+        transmute((a1, b1))
+    }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st4)
+    assert_instr(zip1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65415,20 +66640,32 @@ pub unsafe fn vst4q_p8(a: *mut p8, b: poly8x16x4_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst4_p16(a: *mut p16, b: poly16x4x4_t) {
-    vst4_s16(transmute(a), transmute(b))
+pub fn vtrn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
+    unsafe {
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let a1: uint32x2_t = simd_shuffle!(a, b, [0, 2]);
+        let b1: uint32x2_t = simd_shuffle!(a, b, [1, 3]);
+        let mut ret_val: uint32x2x2_t = transmute((a1, b1));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+        ret_val
+    }
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(st4)
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65438,20 +66675,27 @@ pub unsafe fn vst4_p16(a: *mut p16, b: poly16x4x4_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vst4q_p16(a: *mut p16, b: poly16x8x4_t) {
-    vst4q_s16(transmute(a), transmute(b))
+pub fn vtrnq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
+    unsafe {
+        let a1: float32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+        let b1: float32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+        transmute((a1, b1))
+    }
 }
-#[doc = "Store SIMD&FP register (immediate offset)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstrq_p128)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon intrinsic unsafe"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65461,62 +66705,62 @@ pub unsafe fn vst4q_p16(a: *mut p16, b: poly16x8x4_t) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vstrq_p128(a: *mut p128, b: p128) {
-    *a = b
+pub fn vtrnq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
+    unsafe {
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let a1: float32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+        let b1: float32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+        let mut ret_val: float32x4x2_t = transmute((a1, b1));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+        ret_val
+    }
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f16)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s8)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f16"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fsub)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    assert_instr(trn1)
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vsub_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
-    unsafe { simd_sub(a, b) }
-}
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fsub)
+    assert_instr(trn2)
 )]
-#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vsubq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
-    unsafe { simd_sub(a, b) }
+pub fn vtrn_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
+    unsafe {
+        let a1: int8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+        let b1: int8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+        transmute((a1, b1))
+    }
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f32)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fsub)
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65526,18 +66770,32 @@ pub fn vsubq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsub_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    unsafe { simd_sub(a, b) }
+pub fn vtrn_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
+    unsafe {
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a1: int8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+        let b1: int8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+        let mut ret_val: int8x8x2_t = transmute((a1, b1));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val
+    }
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f32)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fsub)
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65547,18 +66805,35 @@ pub fn vsub_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    unsafe { simd_sub(a, b) }
+pub fn vtrnq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
+    unsafe {
+        let a1: int8x16_t = simd_shuffle!(
+            a,
+            b,
+            [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
+        );
+        let b1: int8x16_t = simd_shuffle!(
+            a,
+            b,
+            [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]
+        );
+        transmute((a1, b1))
+    }
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s16)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sub)
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65568,18 +66843,50 @@ pub fn vsubq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe { simd_sub(a, b) }
+pub fn vtrnq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
+    unsafe {
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let a1: int8x16_t = simd_shuffle!(
+            a,
+            b,
+            [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
+        );
+        let b1: int8x16_t = simd_shuffle!(
+            a,
+            b,
+            [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]
+        );
+        let mut ret_val: int8x16x2_t = transmute((a1, b1));
+        ret_val.0 = simd_shuffle!(
+            ret_val.0,
+            ret_val.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        ret_val.1 = simd_shuffle!(
+            ret_val.1,
+            ret_val.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        ret_val
+    }
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s16)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sub)
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65589,18 +66896,27 @@ pub fn vsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe { simd_sub(a, b) }
+pub fn vtrn_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
+    unsafe {
+        let a1: int16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+        let b1: int16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+        transmute((a1, b1))
+    }
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u16)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sub)
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65610,18 +66926,32 @@ pub fn vsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe { simd_sub(a, b) }
+pub fn vtrn_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
+    unsafe {
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let a1: int16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+        let b1: int16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+        let mut ret_val: int16x4x2_t = transmute((a1, b1));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+        ret_val
+    }
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u16)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sub)
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65631,18 +66961,27 @@ pub fn vsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe { simd_sub(a, b) }
+pub fn vtrnq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
+    unsafe {
+        let a1: int16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+        let b1: int16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+        transmute((a1, b1))
+    }
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s32)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sub)
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65652,18 +66991,32 @@ pub fn vsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe { simd_sub(a, b) }
+pub fn vtrnq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
+    unsafe {
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a1: int16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+        let b1: int16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+        let mut ret_val: int16x8x2_t = transmute((a1, b1));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val
+    }
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s32)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sub)
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65673,18 +67026,27 @@ pub fn vsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe { simd_sub(a, b) }
+pub fn vtrnq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
+    unsafe {
+        let a1: int32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+        let b1: int32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+        transmute((a1, b1))
+    }
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u32)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sub)
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65694,18 +67056,32 @@ pub fn vsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe { simd_sub(a, b) }
+pub fn vtrnq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
+    unsafe {
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let a1: int32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+        let b1: int32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+        let mut ret_val: int32x4x2_t = transmute((a1, b1));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+        ret_val
+    }
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u32)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sub)
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65715,18 +67091,27 @@ pub fn vsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe { simd_sub(a, b) }
+pub fn vtrn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
+    unsafe {
+        let a1: uint8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+        let b1: uint8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+        transmute((a1, b1))
+    }
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s64)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sub)
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65736,18 +67121,32 @@ pub fn vsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    unsafe { simd_sub(a, b) }
+pub fn vtrn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
+    unsafe {
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a1: uint8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+        let b1: uint8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+        let mut ret_val: uint8x8x2_t = transmute((a1, b1));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val
+    }
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s64)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sub)
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65757,18 +67156,35 @@ pub fn vsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    unsafe { simd_sub(a, b) }
+pub fn vtrnq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
+    unsafe {
+        let a1: uint8x16_t = simd_shuffle!(
+            a,
+            b,
+            [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
+        );
+        let b1: uint8x16_t = simd_shuffle!(
+            a,
+            b,
+            [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]
+        );
+        transmute((a1, b1))
+    }
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u64)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sub)
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65778,39 +67194,50 @@ pub fn vsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    unsafe { simd_sub(a, b) }
+pub fn vtrnq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
+    unsafe {
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let a1: uint8x16_t = simd_shuffle!(
+            a,
+            b,
+            [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
+        );
+        let b1: uint8x16_t = simd_shuffle!(
+            a,
+            b,
+            [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]
+        );
+        let mut ret_val: uint8x16x2_t = transmute((a1, b1));
+        ret_val.0 = simd_shuffle!(
+            ret_val.0,
+            ret_val.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        ret_val.1 = simd_shuffle!(
+            ret_val.1,
+            ret_val.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        ret_val
+    }
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u64)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sub)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    assert_instr(trn1)
 )]
-pub fn vsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    unsafe { simd_sub(a, b) }
-}
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sub)
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65820,18 +67247,27 @@ pub fn vsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe { simd_sub(a, b) }
+pub fn vtrn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
+    unsafe {
+        let a1: uint16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+        let b1: uint16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+        transmute((a1, b1))
+    }
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s8)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sub)
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65841,18 +67277,32 @@ pub fn vsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe { simd_sub(a, b) }
+pub fn vtrn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
+    unsafe {
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let a1: uint16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+        let b1: uint16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+        let mut ret_val: uint16x4x2_t = transmute((a1, b1));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+        ret_val
+    }
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u8)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sub)
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65862,18 +67312,27 @@ pub fn vsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe { simd_sub(a, b) }
+pub fn vtrnq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
+    unsafe {
+        let a1: uint16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+        let b1: uint16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+        transmute((a1, b1))
+    }
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u8)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sub)
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65883,22 +67342,32 @@ pub fn vsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe { simd_sub(a, b) }
+pub fn vtrnq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
+    unsafe {
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a1: uint16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+        let b1: uint16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+        let mut ret_val: uint16x8x2_t = transmute((a1, b1));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val
+    }
 }
-#[doc = "Subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s16)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
-    all(
-        test,
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        target_endian = "little"
-    ),
-    assert_instr(subhn2)
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65908,23 +67377,27 @@ pub fn vsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
-    let d = vsubhn_s16(b, c);
-    vcombine_s8(a, d)
+pub fn vtrnq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
+    unsafe {
+        let a1: uint32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+        let b1: uint32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+        transmute((a1, b1))
+    }
 }
-#[doc = "Subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s32)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
-    all(
-        test,
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        target_endian = "little"
-    ),
-    assert_instr(subhn2)
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65934,23 +67407,32 @@ pub fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
-    let d = vsubhn_s32(b, c);
-    vcombine_s16(a, d)
+pub fn vtrnq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
+    unsafe {
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let a1: uint32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+        let b1: uint32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+        let mut ret_val: uint32x4x2_t = transmute((a1, b1));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+        ret_val
+    }
 }
-#[doc = "Subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s64)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
-    all(
-        test,
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        target_endian = "little"
-    ),
-    assert_instr(subhn2)
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65960,23 +67442,27 @@ pub fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
-    let d = vsubhn_s64(b, c);
-    vcombine_s32(a, d)
+pub fn vtrn_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
+    unsafe {
+        let a1: poly8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+        let b1: poly8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+        transmute((a1, b1))
+    }
 }
-#[doc = "Subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u16)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_p8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
-    all(
-        test,
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        target_endian = "little"
-    ),
-    assert_instr(subhn2)
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -65986,23 +67472,32 @@ pub fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
-    let d = vsubhn_u16(b, c);
-    vcombine_u8(a, d)
+pub fn vtrn_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
+    unsafe {
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a1: poly8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+        let b1: poly8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+        let mut ret_val: poly8x8x2_t = transmute((a1, b1));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val
+    }
 }
-#[doc = "Subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u32)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
-    all(
-        test,
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        target_endian = "little"
-    ),
-    assert_instr(subhn2)
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -66012,23 +67507,35 @@ pub fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
-    let d = vsubhn_u32(b, c);
-    vcombine_u16(a, d)
+pub fn vtrnq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
+    unsafe {
+        let a1: poly8x16_t = simd_shuffle!(
+            a,
+            b,
+            [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
+        );
+        let b1: poly8x16_t = simd_shuffle!(
+            a,
+            b,
+            [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]
+        );
+        transmute((a1, b1))
+    }
 }
-#[doc = "Subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u64)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_p8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
-    all(
-        test,
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        target_endian = "little"
-    ),
-    assert_instr(subhn2)
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -66038,19 +67545,50 @@ pub fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
-    let d = vsubhn_u64(b, c);
-    vcombine_u32(a, d)
+pub fn vtrnq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let a1: poly8x16_t = simd_shuffle!(
+            a,
+            b,
+            [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
+        );
+        let b1: poly8x16_t = simd_shuffle!(
+            a,
+            b,
+            [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]
+        );
+        let mut ret_val: poly8x16x2_t = transmute((a1, b1));
+        ret_val.0 = simd_shuffle!(
+            ret_val.0,
+            ret_val.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        ret_val.1 = simd_shuffle!(
+            ret_val.1,
+            ret_val.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        ret_val
+    }
 }
-#[doc = "Subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s16)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_p16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(subhn)
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -66060,19 +67598,27 @@ pub fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
-    let c: i16x8 = i16x8::new(8, 8, 8, 8, 8, 8, 8, 8);
-    unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) }
+pub fn vtrn_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
+    unsafe {
+        let a1: poly16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+        let b1: poly16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+        transmute((a1, b1))
+    }
 }
-#[doc = "Subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s32)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_p16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(subhn)
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -66082,19 +67628,32 @@ pub fn vsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
-    let c: i32x4 = i32x4::new(16, 16, 16, 16);
-    unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) }
+pub fn vtrn_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
+    unsafe {
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let a1: poly16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+        let b1: poly16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+        let mut ret_val: poly16x4x2_t = transmute((a1, b1));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+        ret_val
+    }
 }
-#[doc = "Subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s64)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_p16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(subhn)
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -66104,19 +67663,27 @@ pub fn vsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
-    let c: i64x2 = i64x2::new(32, 32);
-    unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) }
+pub fn vtrnq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
+    unsafe {
+        let a1: poly16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+        let b1: poly16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+        transmute((a1, b1))
+    }
 }
-#[doc = "Subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u16)"]
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_p16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(subhn)
+    assert_instr(trn1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -66126,19 +67693,27 @@ pub fn vsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
-    let c: u16x8 = u16x8::new(8, 8, 8, 8, 8, 8, 8, 8);
-    unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) }
+pub fn vtrnq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
+    unsafe {
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a1: poly16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+        let b1: poly16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+        let mut ret_val: poly16x8x2_t = transmute((a1, b1));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val
+    }
 }
-#[doc = "Subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u32)"]
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(subhn)
+    assert_instr(cmtst)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -66148,19 +67723,22 @@ pub fn vsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
-    let c: u32x4 = u32x4::new(16, 16, 16, 16);
-    unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) }
+pub fn vtst_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
+    unsafe {
+        let c: int8x8_t = simd_and(a, b);
+        let d: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        simd_ne(c, transmute(d))
+    }
 }
-#[doc = "Subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u64)"]
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(subhn)
+    assert_instr(cmtst)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -66170,19 +67748,22 @@ pub fn vsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
-    let c: u64x2 = u64x2::new(32, 32);
-    unsafe { simd_cast(simd_shr(simd_sub(a, b), transmute(c))) }
+pub fn vtstq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
+    unsafe {
+        let c: int8x16_t = simd_and(a, b);
+        let d: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        simd_ne(c, transmute(d))
+    }
 }
-#[doc = "Signed Subtract Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s8)"]
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ssubl)
+    assert_instr(cmtst)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -66192,22 +67773,22 @@ pub fn vsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
+pub fn vtst_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
     unsafe {
-        let c: int16x8_t = simd_cast(a);
-        let d: int16x8_t = simd_cast(b);
-        simd_sub(c, d)
+        let c: int16x4_t = simd_and(a, b);
+        let d: i16x4 = i16x4::new(0, 0, 0, 0);
+        simd_ne(c, transmute(d))
     }
 }
-#[doc = "Signed Subtract Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s16)"]
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ssubl)
+    assert_instr(cmtst)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -66217,22 +67798,22 @@ pub fn vsubl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
+pub fn vtstq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
     unsafe {
-        let c: int32x4_t = simd_cast(a);
-        let d: int32x4_t = simd_cast(b);
-        simd_sub(c, d)
+        let c: int16x8_t = simd_and(a, b);
+        let d: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        simd_ne(c, transmute(d))
     }
 }
-#[doc = "Signed Subtract Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s32)"]
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ssubl)
+    assert_instr(cmtst)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -66242,22 +67823,22 @@ pub fn vsubl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
+pub fn vtst_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
     unsafe {
-        let c: int64x2_t = simd_cast(a);
-        let d: int64x2_t = simd_cast(b);
-        simd_sub(c, d)
+        let c: int32x2_t = simd_and(a, b);
+        let d: i32x2 = i32x2::new(0, 0);
+        simd_ne(c, transmute(d))
     }
 }
-#[doc = "Unsigned Subtract Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u8)"]
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(usubl)
+    assert_instr(cmtst)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -66267,22 +67848,22 @@ pub fn vsubl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
+pub fn vtstq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
     unsafe {
-        let c: uint16x8_t = simd_cast(a);
-        let d: uint16x8_t = simd_cast(b);
-        simd_sub(c, d)
+        let c: int32x4_t = simd_and(a, b);
+        let d: i32x4 = i32x4::new(0, 0, 0, 0);
+        simd_ne(c, transmute(d))
     }
 }
-#[doc = "Unsigned Subtract Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u16)"]
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(usubl)
+    assert_instr(cmtst)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -66292,22 +67873,22 @@ pub fn vsubl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
+pub fn vtst_p8(a: poly8x8_t, b: poly8x8_t) -> uint8x8_t {
     unsafe {
-        let c: uint32x4_t = simd_cast(a);
-        let d: uint32x4_t = simd_cast(b);
-        simd_sub(c, d)
+        let c: poly8x8_t = simd_and(a, b);
+        let d: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        simd_ne(c, transmute(d))
     }
 }
-#[doc = "Unsigned Subtract Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u32)"]
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(usubl)
+    assert_instr(cmtst)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -66317,22 +67898,22 @@ pub fn vsubl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
+pub fn vtstq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t {
     unsafe {
-        let c: uint64x2_t = simd_cast(a);
-        let d: uint64x2_t = simd_cast(b);
-        simd_sub(c, d)
+        let c: poly8x16_t = simd_and(a, b);
+        let d: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        simd_ne(c, transmute(d))
     }
 }
-#[doc = "Signed Subtract Wide"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s8)"]
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ssubw)
+    assert_instr(cmtst)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -66342,18 +67923,22 @@ pub fn vsubl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubw_s8(a: int16x8_t, b: int8x8_t) -> int16x8_t {
-    unsafe { simd_sub(a, simd_cast(b)) }
+pub fn vtst_p16(a: poly16x4_t, b: poly16x4_t) -> uint16x4_t {
+    unsafe {
+        let c: poly16x4_t = simd_and(a, b);
+        let d: i16x4 = i16x4::new(0, 0, 0, 0);
+        simd_ne(c, transmute(d))
+    }
 }
-#[doc = "Signed Subtract Wide"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s16)"]
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ssubw)
+    assert_instr(cmtst)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -66363,18 +67948,22 @@ pub fn vsubw_s8(a: int16x8_t, b: int8x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubw_s16(a: int32x4_t, b: int16x4_t) -> int32x4_t {
-    unsafe { simd_sub(a, simd_cast(b)) }
+pub fn vtstq_p16(a: poly16x8_t, b: poly16x8_t) -> uint16x8_t {
+    unsafe {
+        let c: poly16x8_t = simd_and(a, b);
+        let d: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        simd_ne(c, transmute(d))
+    }
 }
-#[doc = "Signed Subtract Wide"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s32)"]
+#[doc = "Unsigned compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ssubw)
+    assert_instr(cmtst)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -66384,18 +67973,22 @@ pub fn vsubw_s16(a: int32x4_t, b: int16x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubw_s32(a: int64x2_t, b: int32x2_t) -> int64x2_t {
-    unsafe { simd_sub(a, simd_cast(b)) }
+pub fn vtst_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe {
+        let c: uint8x8_t = simd_and(a, b);
+        let d: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        simd_ne(c, transmute(d))
+    }
 }
-#[doc = "Unsigned Subtract Wide"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u8)"]
+#[doc = "Unsigned compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(usubw)
+    assert_instr(cmtst)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -66405,18 +67998,22 @@ pub fn vsubw_s32(a: int64x2_t, b: int32x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t {
-    unsafe { simd_sub(a, simd_cast(b)) }
+pub fn vtstq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe {
+        let c: uint8x16_t = simd_and(a, b);
+        let d: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        simd_ne(c, transmute(d))
+    }
 }
-#[doc = "Unsigned Subtract Wide"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u16)"]
+#[doc = "Unsigned compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(usubw)
+    assert_instr(cmtst)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -66426,18 +68023,22 @@ pub fn vsubw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubw_u16(a: uint32x4_t, b: uint16x4_t) -> uint32x4_t {
-    unsafe { simd_sub(a, simd_cast(b)) }
+pub fn vtst_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe {
+        let c: uint16x4_t = simd_and(a, b);
+        let d: u16x4 = u16x4::new(0, 0, 0, 0);
+        simd_ne(c, transmute(d))
+    }
 }
-#[doc = "Unsigned Subtract Wide"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u32)"]
+#[doc = "Unsigned compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(usubw)
+    assert_instr(cmtst)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -66447,791 +68048,476 @@ pub fn vsubw_u16(a: uint32x4_t, b: uint16x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t {
-    unsafe { simd_sub(a, simd_cast(b)) }
+pub fn vtstq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe {
+        let c: uint16x8_t = simd_and(a, b);
+        let d: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+        simd_ne(c, transmute(d))
+    }
 }
-#[doc = "Dot product index form with signed and unsigned integers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_lane_s32)"]
+#[doc = "Unsigned compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u32)"]
 #[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
 #[cfg_attr(
-    all(
-        test,
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        target_endian = "little"
-    ),
-    assert_instr(sudot, LANE = 0)
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmtst)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsudot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c = vreinterpret_u32_u8(c);
-    let c = vdup_lane_u32::<LANE>(c);
-    vusdot_s32(a, vreinterpret_u8_u32(c), b)
+pub fn vtst_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe {
+        let c: uint32x2_t = simd_and(a, b);
+        let d: u32x2 = u32x2::new(0, 0);
+        simd_ne(c, transmute(d))
+    }
 }
-#[doc = "Dot product index form with signed and unsigned integers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_lane_s32)"]
+#[doc = "Unsigned compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u32)"]
 #[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
 #[cfg_attr(
-    all(
-        test,
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        target_endian = "little"
-    ),
-    assert_instr(sudot, LANE = 0)
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmtst)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vsudotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c = vreinterpret_u32_u8(c);
-    let c = vdupq_lane_u32::<LANE>(c);
-    vusdotq_s32(a, vreinterpretq_u8_u32(c), b)
-}
-#[doc = "Dot product index form with signed and unsigned integers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_laneq_s32)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 1))]
-#[cfg_attr(
-    all(
-        test,
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        target_endian = "little"
-    ),
-    assert_instr(sudot, LANE = 3)
-)]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
-pub fn vsudot_laneq_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x16_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c = vreinterpretq_u32_u8(c);
-    let c = vdup_laneq_u32::<LANE>(c);
-    vusdot_s32(a, vreinterpret_u8_u32(c), b)
-}
-#[doc = "Dot product index form with signed and unsigned integers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_laneq_s32)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 1))]
-#[cfg_attr(
-    all(
-        test,
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        target_endian = "little"
-    ),
-    assert_instr(sudot, LANE = 3)
-)]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
-pub fn vsudotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: uint8x16_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c = vreinterpretq_u32_u8(c);
-    let c = vdupq_laneq_u32::<LANE>(c);
-    vusdotq_s32(a, vreinterpretq_u8_u32(c), b)
-}
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-fn vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl1")]
-        fn _vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-    unsafe { _vtbl1(a, b) }
-}
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_s8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    vtbl1(a, b)
-}
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_u8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe { transmute(vtbl1(transmute(a), transmute(b))) }
-}
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_p8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t {
-    unsafe { transmute(vtbl1(transmute(a), transmute(b))) }
-}
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-fn vtbl2(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl2")]
-        fn _vtbl2(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
-    }
-    unsafe { _vtbl2(a, b, c) }
-}
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_s8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t {
-    vtbl2(a.0, a.1, b)
-}
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe { transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) }
-}
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t {
-    unsafe { transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) }
-}
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-fn vtbl3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl3")]
-        fn _vtbl3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t;
-    }
-    unsafe { _vtbl3(a, b, c, d) }
-}
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_s8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t {
-    vtbl3(a.0, a.1, a.2, b)
-}
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe {
-        transmute(vtbl3(
-            transmute(a.0),
-            transmute(a.1),
-            transmute(a.2),
-            transmute(b),
-        ))
-    }
-}
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t {
-    unsafe {
-        transmute(vtbl3(
-            transmute(a.0),
-            transmute(a.1),
-            transmute(a.2),
-            transmute(b),
-        ))
-    }
-}
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-fn vtbl4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl4")]
-        fn _vtbl4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t;
-    }
-    unsafe { _vtbl4(a, b, c, d, e) }
-}
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_s8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t {
-    vtbl4(a.0, a.1, a.2, a.3, b)
-}
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe {
-        transmute(vtbl4(
-            transmute(a.0),
-            transmute(a.1),
-            transmute(a.2),
-            transmute(a.3),
-            transmute(b),
-        ))
-    }
-}
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t {
-    unsafe {
-        transmute(vtbl4(
-            transmute(a.0),
-            transmute(a.1),
-            transmute(a.2),
-            transmute(a.3),
-            transmute(b),
-        ))
-    }
-}
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1)"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-fn vtbx1(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx1")]
-        fn _vtbx1(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
-    }
-    unsafe { _vtbx1(a, b, c) }
-}
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_s8)"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
-    vtbx1(a, b, c)
-}
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_u8)"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
-    unsafe { transmute(vtbx1(transmute(a), transmute(b), transmute(c))) }
-}
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_p8)"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t {
-    unsafe { transmute(vtbx1(transmute(a), transmute(b), transmute(c))) }
-}
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2)"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-fn vtbx2(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx2")]
-        fn _vtbx2(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t;
-    }
-    unsafe { _vtbx2(a, b, c, d) }
-}
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_s8)"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t {
-    vtbx2(a, b.0, b.1, c)
-}
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t {
-    unsafe {
-        transmute(vtbx2(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(c),
-        ))
-    }
-}
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t {
+pub fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unsafe {
-        transmute(vtbx2(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(c),
-        ))
-    }
-}
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3)"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-fn vtbx3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx3")]
-        fn _vtbx3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t;
+        let c: uint32x4_t = simd_and(a, b);
+        let d: u32x4 = u32x4::new(0, 0, 0, 0);
+        simd_ne(c, transmute(d))
     }
-    unsafe { _vtbx3(a, b, c, d, e) }
 }
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_s8)"]
+#[doc = "Dot product index form with unsigned and signed integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_lane_s32)"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t {
-    vtbx3(a, b.0, b.1, b.2, c)
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usdot, LANE = 0)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vusdot_lane_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c = vreinterpret_s32_s8(c);
+    let c = vdup_lane_s32::<LANE>(c);
+    vusdot_s32(a, b, vreinterpret_s8_s32(c))
 }
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"]
+#[doc = "Dot product index form with unsigned and signed integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_lane_s32)"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t {
-    unsafe {
-        transmute(vtbx3(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(b.2),
-            transmute(c),
-        ))
-    }
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usdot, LANE = 0)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vusdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c = vreinterpret_s32_s8(c);
+    let c = vdupq_lane_s32::<LANE>(c);
+    vusdotq_s32(a, b, vreinterpretq_s8_s32(c))
 }
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"]
+#[doc = "Dot product index form with unsigned and signed integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_laneq_s32)"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t {
-    unsafe {
-        transmute(vtbx3(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(b.2),
-            transmute(c),
-        ))
-    }
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usdot, LANE = 3)
+)]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
+pub fn vusdot_laneq_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x16_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c = vreinterpretq_s32_s8(c);
+    let c = vdup_laneq_s32::<LANE>(c);
+    vusdot_s32(a, b, vreinterpret_s8_s32(c))
 }
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4)"]
+#[doc = "Dot product index form with unsigned and signed integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_laneq_s32)"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-fn vtbx4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t, f: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx4")]
-        fn _vtbx4(
-            a: int8x8_t,
-            b: int8x8_t,
-            c: int8x8_t,
-            d: int8x8_t,
-            e: int8x8_t,
-            f: int8x8_t,
-        ) -> int8x8_t;
-    }
-    unsafe { _vtbx4(a, b, c, d, e, f) }
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usdot, LANE = 3)
+)]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
+pub fn vusdotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c = vreinterpretq_s32_s8(c);
+    let c = vdupq_laneq_s32::<LANE>(c);
+    vusdotq_s32(a, b, vreinterpretq_s8_s32(c))
 }
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_s8)"]
+#[doc = "Dot product vector form with unsigned and signed integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t {
-    unsafe {
-        vtbx4(
-            a,
-            transmute(b.0),
-            transmute(b.1),
-            transmute(b.2),
-            transmute(b.3),
-            c,
-        )
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))]
+#[cfg_attr(
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(usdot)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vusdot_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.usdot.v2i32.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usdot.v2i32.v8i8")]
+        fn _vusdot_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t;
     }
+    unsafe { _vusdot_s32(a, b, c) }
 }
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_s8)"]
+#[doc = "Dot product vector form with unsigned and signed integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t {
-    let mut b: int8x8x4_t = b;
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
-        b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]);
-        b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]);
-        b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]);
-        b.3 = simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let c: int8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
-        let ret_val: int8x8_t = vtbx4(
-            a,
-            transmute(b.0),
-            transmute(b.1),
-            transmute(b.2),
-            transmute(b.3),
-            c,
-        );
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))]
+#[cfg_attr(
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(usdot)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vusdot_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.usdot.v2i32.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usdot.v2i32.v8i8")]
+        fn _vusdot_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t;
     }
-}
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t {
     unsafe {
-        transmute(vtbx4(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(b.2),
-            transmute(b.3),
-            transmute(c),
-        ))
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let c: int8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int32x2_t = _vusdot_s32(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"]
+#[doc = "Dot product vector form with unsigned and signed integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_s32)"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t {
-    unsafe {
-        transmute(vtbx4(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(b.2),
-            transmute(b.3),
-            transmute(c),
-        ))
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))]
+#[cfg_attr(
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(usdot)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vusdotq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.usdot.v4i32.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usdot.v4i32.v16i8")]
+        fn _vusdotq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t;
     }
+    unsafe { _vusdotq_s32(a, b, c) }
 }
-#[doc = "Transpose elements"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f16)"]
+#[doc = "Dot product vector form with unsigned and signed integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_s32)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn1)
-)]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn2)
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(usdot)
 )]
-#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vtrn_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t {
+pub fn vusdotq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.usdot.v4i32.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usdot.v4i32.v16i8")]
+        fn _vusdotq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t;
+    }
     unsafe {
-        let a1: float16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
-        let b1: float16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
-        transmute((a1, b1))
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let c: int8x16_t =
+            simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int32x4_t = _vusdotq_s32(a, b, c);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
-#[doc = "Transpose elements"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_f16)"]
+#[doc = "Unsigned and signed 8-bit integer matrix multiply-accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusmmlaq_s32)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn1)
-)]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn2)
+    assert_instr(usmmla)
 )]
-#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vtrnq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t {
-    unsafe {
-        let a1: float16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
-        let b1: float16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
-        transmute((a1, b1))
+pub fn vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.usmmla.v4i32.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usmmla.v4i32.v16i8")]
+        fn _vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t;
     }
+    unsafe { _vusmmlaq_s32(a, b, c) }
 }
-#[doc = "Transpose elements"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(zip1)
+    assert_instr(uzp1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(zip2)
+    assert_instr(uzp2)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtrn_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vuzp_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t {
     unsafe {
-        let a1: float32x2_t = simd_shuffle!(a, b, [0, 2]);
-        let b1: float32x2_t = simd_shuffle!(a, b, [1, 3]);
-        transmute((a1, b1))
+        let a0: float16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+        let b0: float16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+        transmute((a0, b0))
     }
 }
-#[doc = "Transpose elements"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(zip1)
+    assert_instr(uzp1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(zip2)
+    assert_instr(uzp2)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtrn_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vuzp_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t {
     unsafe {
-        let a1: int32x2_t = simd_shuffle!(a, b, [0, 2]);
-        let b1: int32x2_t = simd_shuffle!(a, b, [1, 3]);
-        transmute((a1, b1))
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let a0: float16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+        let b0: float16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+        let mut ret_val: float16x4x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+        ret_val
     }
 }
-#[doc = "Transpose elements"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(zip1)
+    assert_instr(uzp1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(zip2)
+    assert_instr(uzp2)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtrn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vuzpq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t {
     unsafe {
-        let a1: uint32x2_t = simd_shuffle!(a, b, [0, 2]);
-        let b1: uint32x2_t = simd_shuffle!(a, b, [1, 3]);
-        transmute((a1, b1))
+        let a0: float16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+        let b0: float16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        transmute((a0, b0))
     }
 }
-#[doc = "Transpose elements"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_f32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn1)
+    assert_instr(uzp1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn2)
+    assert_instr(uzp2)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtrnq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vuzpq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t {
     unsafe {
-        let a1: float32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
-        let b1: float32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
-        transmute((a1, b1))
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a0: float16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+        let b0: float16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        let mut ret_val: float16x8x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val
     }
 }
-#[doc = "Transpose elements"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn1)
+    assert_instr(zip1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn2)
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67241,26 +68527,27 @@ pub fn vtrnq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtrn_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
+pub fn vuzp_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
     unsafe {
-        let a1: int8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
-        let b1: int8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
-        transmute((a1, b1))
+        let a0: float32x2_t = simd_shuffle!(a, b, [0, 2]);
+        let b0: float32x2_t = simd_shuffle!(a, b, [1, 3]);
+        transmute((a0, b0))
     }
 }
-#[doc = "Transpose elements"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn1)
+    assert_instr(zip1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn2)
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67270,34 +68557,32 @@ pub fn vtrn_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtrnq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
+pub fn vuzp_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
     unsafe {
-        let a1: int8x16_t = simd_shuffle!(
-            a,
-            b,
-            [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
-        );
-        let b1: int8x16_t = simd_shuffle!(
-            a,
-            b,
-            [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]
-        );
-        transmute((a1, b1))
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let a0: float32x2_t = simd_shuffle!(a, b, [0, 2]);
+        let b0: float32x2_t = simd_shuffle!(a, b, [1, 3]);
+        let mut ret_val: float32x2x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+        ret_val
     }
 }
-#[doc = "Transpose elements"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn1)
+    assert_instr(zip1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn2)
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67307,26 +68592,27 @@ pub fn vtrnq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtrn_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
+pub fn vuzp_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
     unsafe {
-        let a1: int16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
-        let b1: int16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
-        transmute((a1, b1))
+        let a0: int32x2_t = simd_shuffle!(a, b, [0, 2]);
+        let b0: int32x2_t = simd_shuffle!(a, b, [1, 3]);
+        transmute((a0, b0))
     }
 }
-#[doc = "Transpose elements"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn1)
+    assert_instr(zip1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn2)
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67336,26 +68622,32 @@ pub fn vtrn_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtrnq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
+pub fn vuzp_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
     unsafe {
-        let a1: int16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
-        let b1: int16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
-        transmute((a1, b1))
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: int32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let a0: int32x2_t = simd_shuffle!(a, b, [0, 2]);
+        let b0: int32x2_t = simd_shuffle!(a, b, [1, 3]);
+        let mut ret_val: int32x2x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+        ret_val
     }
 }
-#[doc = "Transpose elements"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn1)
+    assert_instr(zip1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn2)
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67365,26 +68657,27 @@ pub fn vtrnq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtrnq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
+pub fn vuzp_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
     unsafe {
-        let a1: int32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
-        let b1: int32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
-        transmute((a1, b1))
+        let a0: uint32x2_t = simd_shuffle!(a, b, [0, 2]);
+        let b0: uint32x2_t = simd_shuffle!(a, b, [1, 3]);
+        transmute((a0, b0))
     }
 }
-#[doc = "Transpose elements"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn1)
+    assert_instr(zip1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn2)
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67394,26 +68687,32 @@ pub fn vtrnq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtrn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
+pub fn vuzp_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
     unsafe {
-        let a1: uint8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
-        let b1: uint8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
-        transmute((a1, b1))
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let a0: uint32x2_t = simd_shuffle!(a, b, [0, 2]);
+        let b0: uint32x2_t = simd_shuffle!(a, b, [1, 3]);
+        let mut ret_val: uint32x2x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+        ret_val
     }
 }
-#[doc = "Transpose elements"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn1)
+    assert_instr(uzp1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn2)
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67423,34 +68722,27 @@ pub fn vtrn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtrnq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
+pub fn vuzpq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
     unsafe {
-        let a1: uint8x16_t = simd_shuffle!(
-            a,
-            b,
-            [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
-        );
-        let b1: uint8x16_t = simd_shuffle!(
-            a,
-            b,
-            [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]
-        );
-        transmute((a1, b1))
+        let a0: float32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+        let b0: float32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+        transmute((a0, b0))
     }
 }
-#[doc = "Transpose elements"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn1)
+    assert_instr(uzp1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn2)
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67460,26 +68752,32 @@ pub fn vtrnq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtrn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
+pub fn vuzpq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
     unsafe {
-        let a1: uint16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
-        let b1: uint16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
-        transmute((a1, b1))
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let a0: float32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+        let b0: float32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+        let mut ret_val: float32x4x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+        ret_val
     }
 }
-#[doc = "Transpose elements"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn1)
+    assert_instr(uzp1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn2)
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67489,26 +68787,27 @@ pub fn vtrn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtrnq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
+pub fn vuzp_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
     unsafe {
-        let a1: uint16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
-        let b1: uint16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
-        transmute((a1, b1))
+        let a0: int8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+        let b0: int8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        transmute((a0, b0))
     }
 }
-#[doc = "Transpose elements"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn1)
+    assert_instr(uzp1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn2)
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67518,26 +68817,32 @@ pub fn vtrnq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtrnq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
+pub fn vuzp_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
     unsafe {
-        let a1: uint32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
-        let b1: uint32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
-        transmute((a1, b1))
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a0: int8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+        let b0: int8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        let mut ret_val: int8x8x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val
     }
 }
-#[doc = "Transpose elements"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_p8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn1)
+    assert_instr(uzp1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn2)
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67547,26 +68852,35 @@ pub fn vtrnq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtrn_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
+pub fn vuzpq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
     unsafe {
-        let a1: poly8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
-        let b1: poly8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
-        transmute((a1, b1))
+        let a0: int8x16_t = simd_shuffle!(
+            a,
+            b,
+            [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
+        );
+        let b0: int8x16_t = simd_shuffle!(
+            a,
+            b,
+            [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
+        );
+        transmute((a0, b0))
     }
 }
-#[doc = "Transpose elements"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_p8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn1)
+    assert_instr(uzp1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn2)
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67576,34 +68890,50 @@ pub fn vtrn_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtrnq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
+pub fn vuzpq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
     unsafe {
-        let a1: poly8x16_t = simd_shuffle!(
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let a0: int8x16_t = simd_shuffle!(
             a,
             b,
-            [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
+            [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
         );
-        let b1: poly8x16_t = simd_shuffle!(
+        let b0: int8x16_t = simd_shuffle!(
             a,
             b,
-            [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]
+            [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
         );
-        transmute((a1, b1))
+        let mut ret_val: int8x16x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(
+            ret_val.0,
+            ret_val.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        ret_val.1 = simd_shuffle!(
+            ret_val.1,
+            ret_val.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        ret_val
     }
 }
-#[doc = "Transpose elements"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_p16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn1)
+    assert_instr(uzp1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn2)
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67613,26 +68943,27 @@ pub fn vtrnq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtrn_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
+pub fn vuzp_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
     unsafe {
-        let a1: poly16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
-        let b1: poly16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
-        transmute((a1, b1))
+        let a0: int16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+        let b0: int16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+        transmute((a0, b0))
     }
 }
-#[doc = "Transpose elements"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_p16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn1)
+    assert_instr(uzp1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(trn2)
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67642,22 +68973,32 @@ pub fn vtrn_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtrnq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
+pub fn vuzp_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
     unsafe {
-        let a1: poly16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
-        let b1: poly16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
-        transmute((a1, b1))
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let a0: int16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+        let b0: int16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+        let mut ret_val: int16x4x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+        ret_val
     }
 }
-#[doc = "Signed compare bitwise Test bits nonzero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cmtst)
+    assert_instr(uzp1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67667,22 +69008,27 @@ pub fn vtrnq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtst_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
+pub fn vuzpq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
     unsafe {
-        let c: int8x8_t = simd_and(a, b);
-        let d: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        simd_ne(c, transmute(d))
+        let a0: int16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+        let b0: int16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        transmute((a0, b0))
     }
 }
-#[doc = "Signed compare bitwise Test bits nonzero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cmtst)
+    assert_instr(uzp1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67692,22 +69038,32 @@ pub fn vtst_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtstq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
+pub fn vuzpq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
     unsafe {
-        let c: int8x16_t = simd_and(a, b);
-        let d: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        simd_ne(c, transmute(d))
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a0: int16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+        let b0: int16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        let mut ret_val: int16x8x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val
     }
 }
-#[doc = "Signed compare bitwise Test bits nonzero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cmtst)
+    assert_instr(uzp1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67717,22 +69073,27 @@ pub fn vtstq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtst_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
+pub fn vuzpq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
     unsafe {
-        let c: int16x4_t = simd_and(a, b);
-        let d: i16x4 = i16x4::new(0, 0, 0, 0);
-        simd_ne(c, transmute(d))
+        let a0: int32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+        let b0: int32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+        transmute((a0, b0))
     }
 }
-#[doc = "Signed compare bitwise Test bits nonzero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cmtst)
+    assert_instr(uzp1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67742,22 +69103,32 @@ pub fn vtst_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtstq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
+pub fn vuzpq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
     unsafe {
-        let c: int16x8_t = simd_and(a, b);
-        let d: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        simd_ne(c, transmute(d))
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let a0: int32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+        let b0: int32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+        let mut ret_val: int32x4x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+        ret_val
     }
 }
-#[doc = "Signed compare bitwise Test bits nonzero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cmtst)
+    assert_instr(uzp1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67767,22 +69138,27 @@ pub fn vtstq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtst_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
+pub fn vuzp_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
     unsafe {
-        let c: int32x2_t = simd_and(a, b);
-        let d: i32x2 = i32x2::new(0, 0);
-        simd_ne(c, transmute(d))
+        let a0: uint8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+        let b0: uint8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        transmute((a0, b0))
     }
 }
-#[doc = "Signed compare bitwise Test bits nonzero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cmtst)
+    assert_instr(uzp1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67792,22 +69168,32 @@ pub fn vtst_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtstq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
+pub fn vuzp_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
     unsafe {
-        let c: int32x4_t = simd_and(a, b);
-        let d: i32x4 = i32x4::new(0, 0, 0, 0);
-        simd_ne(c, transmute(d))
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a0: uint8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+        let b0: uint8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        let mut ret_val: uint8x8x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val
     }
 }
-#[doc = "Signed compare bitwise Test bits nonzero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cmtst)
+    assert_instr(uzp1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67817,22 +69203,35 @@ pub fn vtstq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtst_p8(a: poly8x8_t, b: poly8x8_t) -> uint8x8_t {
+pub fn vuzpq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
     unsafe {
-        let c: poly8x8_t = simd_and(a, b);
-        let d: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        simd_ne(c, transmute(d))
+        let a0: uint8x16_t = simd_shuffle!(
+            a,
+            b,
+            [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
+        );
+        let b0: uint8x16_t = simd_shuffle!(
+            a,
+            b,
+            [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
+        );
+        transmute((a0, b0))
     }
 }
-#[doc = "Signed compare bitwise Test bits nonzero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cmtst)
+    assert_instr(uzp1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67842,22 +69241,50 @@ pub fn vtst_p8(a: poly8x8_t, b: poly8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtstq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t {
+pub fn vuzpq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
     unsafe {
-        let c: poly8x16_t = simd_and(a, b);
-        let d: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        simd_ne(c, transmute(d))
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let a0: uint8x16_t = simd_shuffle!(
+            a,
+            b,
+            [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
+        );
+        let b0: uint8x16_t = simd_shuffle!(
+            a,
+            b,
+            [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
+        );
+        let mut ret_val: uint8x16x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(
+            ret_val.0,
+            ret_val.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        ret_val.1 = simd_shuffle!(
+            ret_val.1,
+            ret_val.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        ret_val
     }
 }
-#[doc = "Signed compare bitwise Test bits nonzero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cmtst)
+    assert_instr(uzp1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67867,22 +69294,27 @@ pub fn vtstq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtst_p16(a: poly16x4_t, b: poly16x4_t) -> uint16x4_t {
+pub fn vuzp_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
     unsafe {
-        let c: poly16x4_t = simd_and(a, b);
-        let d: i16x4 = i16x4::new(0, 0, 0, 0);
-        simd_ne(c, transmute(d))
+        let a0: uint16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+        let b0: uint16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+        transmute((a0, b0))
     }
 }
-#[doc = "Signed compare bitwise Test bits nonzero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cmtst)
+    assert_instr(uzp1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67892,22 +69324,32 @@ pub fn vtst_p16(a: poly16x4_t, b: poly16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtstq_p16(a: poly16x8_t, b: poly16x8_t) -> uint16x8_t {
+pub fn vuzp_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
     unsafe {
-        let c: poly16x8_t = simd_and(a, b);
-        let d: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        simd_ne(c, transmute(d))
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let a0: uint16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+        let b0: uint16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+        let mut ret_val: uint16x4x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+        ret_val
     }
 }
-#[doc = "Unsigned compare bitwise Test bits nonzero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp1)
+)]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cmtst)
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67917,22 +69359,27 @@ pub fn vtstq_p16(a: poly16x8_t, b: poly16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtst_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+pub fn vuzpq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
     unsafe {
-        let c: uint8x8_t = simd_and(a, b);
-        let d: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        simd_ne(c, transmute(d))
+        let a0: uint16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+        let b0: uint16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        transmute((a0, b0))
     }
 }
-#[doc = "Unsigned compare bitwise Test bits nonzero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u8)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cmtst)
+    assert_instr(uzp1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67942,22 +69389,32 @@ pub fn vtst_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtstq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+pub fn vuzpq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
     unsafe {
-        let c: uint8x16_t = simd_and(a, b);
-        let d: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        simd_ne(c, transmute(d))
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a0: uint16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+        let b0: uint16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        let mut ret_val: uint16x8x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val
     }
 }
-#[doc = "Unsigned compare bitwise Test bits nonzero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cmtst)
+    assert_instr(uzp1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67967,22 +69424,27 @@ pub fn vtstq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtst_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+pub fn vuzpq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
     unsafe {
-        let c: uint16x4_t = simd_and(a, b);
-        let d: u16x4 = u16x4::new(0, 0, 0, 0);
-        simd_ne(c, transmute(d))
+        let a0: uint32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+        let b0: uint32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+        transmute((a0, b0))
     }
 }
-#[doc = "Unsigned compare bitwise Test bits nonzero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u16)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cmtst)
+    assert_instr(uzp1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -67992,22 +69454,32 @@ pub fn vtst_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtstq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+pub fn vuzpq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
     unsafe {
-        let c: uint16x8_t = simd_and(a, b);
-        let d: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        simd_ne(c, transmute(d))
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let a0: uint32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+        let b0: uint32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+        let mut ret_val: uint32x4x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+        ret_val
     }
 }
-#[doc = "Unsigned compare bitwise Test bits nonzero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cmtst)
+    assert_instr(uzp1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -68017,22 +69489,27 @@ pub fn vtstq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtst_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+pub fn vuzp_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
     unsafe {
-        let c: uint32x2_t = simd_and(a, b);
-        let d: u32x2 = u32x2::new(0, 0);
-        simd_ne(c, transmute(d))
+        let a0: poly8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+        let b0: poly8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        transmute((a0, b0))
     }
 }
-#[doc = "Unsigned compare bitwise Test bits nonzero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_p8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cmtst)
+    assert_instr(uzp1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -68042,197 +69519,209 @@ pub fn vtst_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+pub fn vuzp_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
     unsafe {
-        let c: uint32x4_t = simd_and(a, b);
-        let d: u32x4 = u32x4::new(0, 0, 0, 0);
-        simd_ne(c, transmute(d))
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a0: poly8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+        let b0: poly8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        let mut ret_val: poly8x8x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val
     }
 }
-#[doc = "Dot product index form with unsigned and signed integers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_lane_s32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_p8)"]
 #[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(usdot, LANE = 0)
+    assert_instr(uzp1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp2)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vusdot_lane_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c = vreinterpret_s32_s8(c);
-    let c = vdup_lane_s32::<LANE>(c);
-    vusdot_s32(a, b, vreinterpret_s8_s32(c))
+pub fn vuzpq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
+    unsafe {
+        let a0: poly8x16_t = simd_shuffle!(
+            a,
+            b,
+            [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
+        );
+        let b0: poly8x16_t = simd_shuffle!(
+            a,
+            b,
+            [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
+        );
+        transmute((a0, b0))
+    }
 }
-#[doc = "Dot product index form with unsigned and signed integers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_lane_s32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_p8)"]
 #[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(usdot, LANE = 0)
+    assert_instr(uzp1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp2)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vusdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c = vreinterpret_s32_s8(c);
-    let c = vdupq_lane_s32::<LANE>(c);
-    vusdotq_s32(a, b, vreinterpretq_s8_s32(c))
+pub fn vuzpq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let a0: poly8x16_t = simd_shuffle!(
+            a,
+            b,
+            [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
+        );
+        let b0: poly8x16_t = simd_shuffle!(
+            a,
+            b,
+            [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
+        );
+        let mut ret_val: poly8x16x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(
+            ret_val.0,
+            ret_val.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        ret_val.1 = simd_shuffle!(
+            ret_val.1,
+            ret_val.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        ret_val
+    }
 }
-#[doc = "Dot product index form with unsigned and signed integers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_laneq_s32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_p16)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 3))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(usdot, LANE = 3)
+    assert_instr(uzp1)
 )]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
-pub fn vusdot_laneq_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x16_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c = vreinterpretq_s32_s8(c);
-    let c = vdup_laneq_s32::<LANE>(c);
-    vusdot_s32(a, b, vreinterpret_s8_s32(c))
-}
-#[doc = "Dot product index form with unsigned and signed integers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_laneq_s32)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(usdot, LANE = 3)
-)]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
-pub fn vusdotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c = vreinterpretq_s32_s8(c);
-    let c = vdupq_laneq_s32::<LANE>(c);
-    vusdotq_s32(a, b, vreinterpretq_s8_s32(c))
-}
-#[doc = "Dot product vector form with unsigned and signed integers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_s32)"]
-#[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))]
-#[cfg_attr(
-    all(
-        test,
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        target_endian = "little"
-    ),
-    assert_instr(usdot)
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vusdot_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.usdot.v2i32.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usdot.v2i32.v8i8")]
-        fn _vusdot_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t;
+pub fn vuzp_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
+    unsafe {
+        let a0: poly16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+        let b0: poly16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+        transmute((a0, b0))
     }
-    unsafe { _vusdot_s32(a, b, c) }
 }
-#[doc = "Dot product vector form with unsigned and signed integers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_s32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_p16)"]
 #[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
-    all(
-        test,
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        target_endian = "little"
-    ),
-    assert_instr(usdot)
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vusdotq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.usdot.v4i32.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usdot.v4i32.v16i8")]
-        fn _vusdotq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t;
+pub fn vuzp_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
+    unsafe {
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let a0: poly16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+        let b0: poly16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+        let mut ret_val: poly16x4x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+        ret_val
     }
-    unsafe { _vusdotq_s32(a, b, c) }
 }
-#[doc = "Unsigned and signed 8-bit integer matrix multiply-accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusmmlaq_s32)"]
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_p16)"]
 #[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(usmmla)
+    assert_instr(uzp1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.usmmla.v4i32.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usmmla.v4i32.v16i8")]
-        fn _vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t;
+pub fn vuzpq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
+    unsafe {
+        let a0: poly16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+        let b0: poly16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        transmute((a0, b0))
     }
-    unsafe { _vusmmlaq_s32(a, b, c) }
 }
 #[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_p16)"]
 #[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
 #[cfg_attr(
@@ -68243,35 +69732,39 @@ pub fn vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t {
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(uzp2)
 )]
-#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vuzp_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t {
+pub fn vuzpq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
     unsafe {
-        let a0: float16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
-        let b0: float16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
-        transmute((a0, b0))
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a0: poly16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+        let b0: poly16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        let mut ret_val: poly16x8x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val
     }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_f16)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vzip.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp2)
+    assert_instr(zip2)
 )]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
@@ -68283,19 +69776,19 @@ pub fn vuzp_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vuzpq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t {
+pub fn vzip_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t {
     unsafe {
-        let a0: float16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
-        let b0: float16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        let a0: float16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+        let b0: float16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
         transmute((a0, b0))
     }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f32)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vzip.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(zip1)
@@ -68304,27 +69797,34 @@ pub fn vuzpq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t {
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(zip2)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vuzp_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vzip_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t {
     unsafe {
-        let a0: float32x2_t = simd_shuffle!(a, b, [0, 2]);
-        let b0: float32x2_t = simd_shuffle!(a, b, [1, 3]);
-        transmute((a0, b0))
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let a0: float16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+        let b0: float16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
+        let mut ret_val: float16x4x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+        ret_val
     }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s32)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[cfg(target_endian = "little")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vzip.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(zip1)
@@ -68333,27 +69833,29 @@ pub fn vuzp_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(zip2)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vuzp_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vzipq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t {
     unsafe {
-        let a0: int32x2_t = simd_shuffle!(a, b, [0, 2]);
-        let b0: int32x2_t = simd_shuffle!(a, b, [1, 3]);
+        let a0: float16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+        let b0: float16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
         transmute((a0, b0))
     }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u32)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
+#[cfg(target_endian = "big")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vzip.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(zip1)
@@ -68362,34 +69864,42 @@ pub fn vuzp_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(zip2)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vuzp_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vzipq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t {
     unsafe {
-        let a0: uint32x2_t = simd_shuffle!(a, b, [0, 2]);
-        let b0: uint32x2_t = simd_shuffle!(a, b, [1, 3]);
-        transmute((a0, b0))
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: float16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a0: float16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+        let b0: float16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+        let mut ret_val: float16x8x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val
     }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_f32)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp2)
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -68399,26 +69909,27 @@ pub fn vuzp_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vuzpq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
+pub fn vzip_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
     unsafe {
-        let a0: float32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
-        let b0: float32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+        let a0: float32x2_t = simd_shuffle!(a, b, [0, 2]);
+        let b0: float32x2_t = simd_shuffle!(a, b, [1, 3]);
         transmute((a0, b0))
     }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s8)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp2)
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -68428,26 +69939,32 @@ pub fn vuzpq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vuzp_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
+pub fn vzip_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
     unsafe {
-        let a0: int8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
-        let b0: int8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
-        transmute((a0, b0))
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: float32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let a0: float32x2_t = simd_shuffle!(a, b, [0, 2]);
+        let b0: float32x2_t = simd_shuffle!(a, b, [1, 3]);
+        let mut ret_val: float32x2x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+        ret_val
     }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s8)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp2)
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -68457,34 +69974,27 @@ pub fn vuzp_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vuzpq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
+pub fn vzip_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
     unsafe {
-        let a0: int8x16_t = simd_shuffle!(
-            a,
-            b,
-            [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
-        );
-        let b0: int8x16_t = simd_shuffle!(
-            a,
-            b,
-            [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
-        );
+        let a0: int32x2_t = simd_shuffle!(a, b, [0, 2]);
+        let b0: int32x2_t = simd_shuffle!(a, b, [1, 3]);
         transmute((a0, b0))
     }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s16)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp2)
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -68494,26 +70004,32 @@ pub fn vuzpq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vuzp_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
+pub fn vzip_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
     unsafe {
-        let a0: int16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
-        let b0: int16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
-        transmute((a0, b0))
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: int32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let a0: int32x2_t = simd_shuffle!(a, b, [0, 2]);
+        let b0: int32x2_t = simd_shuffle!(a, b, [1, 3]);
+        let mut ret_val: int32x2x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+        ret_val
     }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s16)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp2)
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -68523,26 +70039,27 @@ pub fn vuzp_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vuzpq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
+pub fn vzip_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
     unsafe {
-        let a0: int16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
-        let b0: int16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        let a0: uint32x2_t = simd_shuffle!(a, b, [0, 2]);
+        let b0: uint32x2_t = simd_shuffle!(a, b, [1, 3]);
         transmute((a0, b0))
     }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s32)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp2)
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -68552,26 +70069,32 @@ pub fn vuzpq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vuzpq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
+pub fn vzip_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
     unsafe {
-        let a0: int32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
-        let b0: int32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
-        transmute((a0, b0))
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]);
+        let a0: uint32x2_t = simd_shuffle!(a, b, [0, 2]);
+        let b0: uint32x2_t = simd_shuffle!(a, b, [1, 3]);
+        let mut ret_val: uint32x2x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+        ret_val
     }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u8)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp2)
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -68581,26 +70104,27 @@ pub fn vuzpq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vuzp_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
+pub fn vzip_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
     unsafe {
-        let a0: uint8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
-        let b0: uint8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        let a0: int8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+        let b0: int8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
         transmute((a0, b0))
     }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u8)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp2)
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -68610,34 +70134,32 @@ pub fn vuzp_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vuzpq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
+pub fn vzip_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
     unsafe {
-        let a0: uint8x16_t = simd_shuffle!(
-            a,
-            b,
-            [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
-        );
-        let b0: uint8x16_t = simd_shuffle!(
-            a,
-            b,
-            [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
-        );
-        transmute((a0, b0))
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a0: int8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+        let b0: int8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+        let mut ret_val: int8x8x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val
     }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u16)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp2)
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -68647,26 +70169,27 @@ pub fn vuzpq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vuzp_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
+pub fn vzip_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
     unsafe {
-        let a0: uint16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
-        let b0: uint16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+        let a0: int16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+        let b0: int16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
         transmute((a0, b0))
     }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u16)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp2)
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -68676,26 +70199,32 @@ pub fn vuzp_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vuzpq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
+pub fn vzip_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
     unsafe {
-        let a0: uint16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
-        let b0: uint16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
-        transmute((a0, b0))
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let a0: int16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+        let b0: int16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
+        let mut ret_val: int16x4x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+        ret_val
     }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u32)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp2)
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -68705,26 +70234,27 @@ pub fn vuzpq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vuzpq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
+pub fn vzip_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
     unsafe {
-        let a0: uint32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
-        let b0: uint32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+        let a0: uint8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+        let b0: uint8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
         transmute((a0, b0))
     }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_p8)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp2)
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -68734,26 +70264,32 @@ pub fn vuzpq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vuzp_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
+pub fn vzip_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
     unsafe {
-        let a0: poly8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
-        let b0: poly8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
-        transmute((a0, b0))
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a0: uint8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+        let b0: uint8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+        let mut ret_val: uint8x8x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val
     }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_p8)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp2)
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -68763,34 +70299,27 @@ pub fn vuzp_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vuzpq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
+pub fn vzip_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
     unsafe {
-        let a0: poly8x16_t = simd_shuffle!(
-            a,
-            b,
-            [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
-        );
-        let b0: poly8x16_t = simd_shuffle!(
-            a,
-            b,
-            [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
-        );
+        let a0: uint16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+        let b0: uint16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
         transmute((a0, b0))
     }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_p16)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp2)
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -68800,26 +70329,32 @@ pub fn vuzpq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vuzp_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
+pub fn vzip_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
     unsafe {
-        let a0: poly16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
-        let b0: poly16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
-        transmute((a0, b0))
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let a0: uint16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+        let b0: uint16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
+        let mut ret_val: uint16x4x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+        ret_val
     }
 }
-#[doc = "Unzip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_p16)"]
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp1)
+    assert_instr(zip1)
 )]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uzp2)
+    assert_instr(zip2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -68829,18 +70364,20 @@ pub fn vuzp_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vuzpq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
+pub fn vzip_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
     unsafe {
-        let a0: poly16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
-        let b0: poly16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        let a0: poly8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+        let b0: poly8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
         transmute((a0, b0))
     }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_p8)"]
 #[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vzip.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(zip1)
@@ -68849,28 +70386,33 @@ pub fn vuzpq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(zip2)
 )]
-#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vzip_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t {
+pub fn vzip_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
     unsafe {
-        let a0: float16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
-        let b0: float16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
-        transmute((a0, b0))
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a0: poly8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+        let b0: poly8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+        let mut ret_val: poly8x8x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val
     }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_p16)"]
 #[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vzip.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(zip1)
@@ -68879,29 +70421,28 @@ pub fn vzip_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t {
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(zip2)
 )]
-#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vzipq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t {
+pub fn vzip_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
     unsafe {
-        let a0: float16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
-        let b0: float16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+        let a0: poly16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+        let b0: poly16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
         transmute((a0, b0))
     }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_p16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(zip1)
@@ -68918,19 +70459,25 @@ pub fn vzipq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vzip_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
+pub fn vzip_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
     unsafe {
-        let a0: float32x2_t = simd_shuffle!(a, b, [0, 2]);
-        let b0: float32x2_t = simd_shuffle!(a, b, [1, 3]);
-        transmute((a0, b0))
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: poly16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let a0: poly16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+        let b0: poly16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
+        let mut ret_val: poly16x4x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+        ret_val
     }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_f32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(zip1)
@@ -68947,19 +70494,20 @@ pub fn vzip_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vzip_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
+pub fn vzipq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
     unsafe {
-        let a0: int32x2_t = simd_shuffle!(a, b, [0, 2]);
-        let b0: int32x2_t = simd_shuffle!(a, b, [1, 3]);
+        let a0: float32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+        let b0: float32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
         transmute((a0, b0))
     }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_f32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(zip1)
@@ -68976,19 +70524,25 @@ pub fn vzip_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vzip_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
+pub fn vzipq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
     unsafe {
-        let a0: uint32x2_t = simd_shuffle!(a, b, [0, 2]);
-        let b0: uint32x2_t = simd_shuffle!(a, b, [1, 3]);
-        transmute((a0, b0))
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: float32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let a0: float32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+        let b0: float32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
+        let mut ret_val: float32x4x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+        ret_val
     }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(zip1)
@@ -69005,19 +70559,28 @@ pub fn vzip_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vzip_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
+pub fn vzipq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
     unsafe {
-        let a0: int8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
-        let b0: int8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+        let a0: int8x16_t = simd_shuffle!(
+            a,
+            b,
+            [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
+        );
+        let b0: int8x16_t = simd_shuffle!(
+            a,
+            b,
+            [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
+        );
         transmute((a0, b0))
     }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(zip1)
@@ -69034,19 +70597,43 @@ pub fn vzip_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vzip_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
+pub fn vzipq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
     unsafe {
-        let a0: int16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
-        let b0: int16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
-        transmute((a0, b0))
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let a0: int8x16_t = simd_shuffle!(
+            a,
+            b,
+            [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
+        );
+        let b0: int8x16_t = simd_shuffle!(
+            a,
+            b,
+            [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
+        );
+        let mut ret_val: int8x16x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(
+            ret_val.0,
+            ret_val.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        ret_val.1 = simd_shuffle!(
+            ret_val.1,
+            ret_val.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        ret_val
     }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(zip1)
@@ -69063,19 +70650,20 @@ pub fn vzip_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vzip_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
+pub fn vzipq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
     unsafe {
-        let a0: uint8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
-        let b0: uint8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+        let a0: int16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+        let b0: int16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
         transmute((a0, b0))
     }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(zip1)
@@ -69092,19 +70680,25 @@ pub fn vzip_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vzip_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
+pub fn vzipq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
     unsafe {
-        let a0: uint16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
-        let b0: uint16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
-        transmute((a0, b0))
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: int16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a0: int16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+        let b0: int16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+        let mut ret_val: int16x8x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val
     }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(zip1)
@@ -69121,19 +70715,20 @@ pub fn vzip_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vzip_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
+pub fn vzipq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
     unsafe {
-        let a0: poly8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
-        let b0: poly8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+        let a0: int32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+        let b0: int32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
         transmute((a0, b0))
     }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(zip1)
@@ -69150,16 +70745,22 @@ pub fn vzip_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vzip_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
+pub fn vzipq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
     unsafe {
-        let a0: poly16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
-        let b0: poly16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
-        transmute((a0, b0))
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: int32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let a0: int32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+        let b0: int32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
+        let mut ret_val: int32x4x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+        ret_val
     }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
@@ -69179,16 +70780,25 @@ pub fn vzip_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vzipq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
+pub fn vzipq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
     unsafe {
-        let a0: float32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
-        let b0: float32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
+        let a0: uint8x16_t = simd_shuffle!(
+            a,
+            b,
+            [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
+        );
+        let b0: uint8x16_t = simd_shuffle!(
+            a,
+            b,
+            [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
+        );
         transmute((a0, b0))
     }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
@@ -69208,24 +70818,40 @@ pub fn vzipq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vzipq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
+pub fn vzipq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
     unsafe {
-        let a0: int8x16_t = simd_shuffle!(
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let a0: uint8x16_t = simd_shuffle!(
             a,
             b,
             [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
         );
-        let b0: int8x16_t = simd_shuffle!(
+        let b0: uint8x16_t = simd_shuffle!(
             a,
             b,
             [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
         );
-        transmute((a0, b0))
+        let mut ret_val: uint8x16x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(
+            ret_val.0,
+            ret_val.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        ret_val.1 = simd_shuffle!(
+            ret_val.1,
+            ret_val.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        ret_val
     }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
@@ -69245,16 +70871,17 @@ pub fn vzipq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vzipq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
+pub fn vzipq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
     unsafe {
-        let a0: int16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
-        let b0: int16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+        let a0: uint16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+        let b0: uint16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
         transmute((a0, b0))
     }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u16)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
@@ -69274,16 +70901,22 @@ pub fn vzipq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vzipq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
+pub fn vzipq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
     unsafe {
-        let a0: int32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
-        let b0: int32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
-        transmute((a0, b0))
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a0: uint16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+        let b0: uint16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+        let mut ret_val: uint16x8x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val
     }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
@@ -69303,24 +70936,17 @@ pub fn vzipq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vzipq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
+pub fn vzipq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
     unsafe {
-        let a0: uint8x16_t = simd_shuffle!(
-            a,
-            b,
-            [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
-        );
-        let b0: uint8x16_t = simd_shuffle!(
-            a,
-            b,
-            [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
-        );
+        let a0: uint32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+        let b0: uint32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
         transmute((a0, b0))
     }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
@@ -69340,16 +70966,22 @@ pub fn vzipq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vzipq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
+pub fn vzipq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
     unsafe {
-        let a0: uint16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
-        let b0: uint16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
-        transmute((a0, b0))
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+        let a0: uint32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+        let b0: uint32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
+        let mut ret_val: uint32x4x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+        ret_val
     }
 }
 #[doc = "Zip vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
@@ -69369,16 +71001,25 @@ pub fn vzipq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vzipq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
+pub fn vzipq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
     unsafe {
-        let a0: uint32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
-        let b0: uint32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
+        let a0: poly8x16_t = simd_shuffle!(
+            a,
+            b,
+            [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
+        );
+        let b0: poly8x16_t = simd_shuffle!(
+            a,
+            b,
+            [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
+        );
         transmute((a0, b0))
     }
 }
 #[doc = "Zip vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_p8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
@@ -69400,6 +71041,10 @@ pub fn vzipq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
 )]
 pub fn vzipq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
     unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly8x16_t =
+            simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
         let a0: poly8x16_t = simd_shuffle!(
             a,
             b,
@@ -69410,12 +71055,24 @@ pub fn vzipq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
             b,
             [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
         );
-        transmute((a0, b0))
+        let mut ret_val: poly8x16x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(
+            ret_val.0,
+            ret_val.0,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        ret_val.1 = simd_shuffle!(
+            ret_val.1,
+            ret_val.1,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        );
+        ret_val
     }
 }
 #[doc = "Zip vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_p16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
@@ -69442,3 +71099,38 @@ pub fn vzipq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
         transmute((a0, b0))
     }
 }
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_p16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip1)
+)]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vzipq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
+    unsafe {
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let b: poly16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let a0: poly16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+        let b0: poly16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+        let mut ret_val: poly16x8x2_t = transmute((a0, b0));
+        ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+        ret_val
+    }
+}
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 2f7f2fc2b0cba..dc83635a0ef3a 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -2274,6 +2274,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['1']]
       - *neon-stable
     static_defs: ['const N: i32']
+    big_endian_inverse: true
     safety: safe
     types:
       - poly64x2_t
@@ -2291,6 +2292,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['1']]
       - *neon-stable
     static_defs: ['const N: i32']
+    big_endian_inverse: true
     safety: safe
     types:
       - [poly64x1_t, poly64x2_t]
@@ -2482,6 +2484,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-stable
     static_defs: ['const N: i32']
+    big_endian_inverse: true
     safety: safe
     types:
       - poly64x2_t
@@ -5705,6 +5708,7 @@ intrinsics:
       - *neon-unstable-fcma
     assert_instr: [fcadd]
     safety: safe
+    big_endian_inverse: true
     types:
       - float32x2_t
       - float32x4_t
@@ -5725,6 +5729,7 @@ intrinsics:
       - *neon-unstable-fcma
     assert_instr: [fcadd]
     safety: safe
+    big_endian_inverse: true
     types:
       - float32x2_t
       - float32x4_t
@@ -5747,6 +5752,7 @@ intrinsics:
       - *target-not-arm64ec
     assert_instr: [fcadd]
     safety: safe
+    big_endian_inverse: true
     types:
       - float16x4_t
       - float16x8_t
@@ -5768,6 +5774,7 @@ intrinsics:
       - *target-not-arm64ec
     assert_instr: [fcadd]
     safety: safe
+    big_endian_inverse: true
     types:
       - float16x4_t
       - float16x8_t
@@ -5787,6 +5794,7 @@ intrinsics:
       - *neon-unstable-fcma
     assert_instr: [fcmla]
     safety: safe
+    big_endian_inverse: true
     types:
       - float32x2_t
       - float32x4_t
@@ -5809,6 +5817,7 @@ intrinsics:
       - *target-not-arm64ec
     assert_instr: [fcmla]
     safety: safe
+    big_endian_inverse: true
     types:
       - float16x4_t
       - float16x8_t
@@ -5828,6 +5837,7 @@ intrinsics:
       - *neon-unstable-fcma
     assert_instr: [fcmla]
     safety: safe
+    big_endian_inverse: true
     types:
       - float32x2_t
       - float32x4_t
@@ -5850,6 +5860,7 @@ intrinsics:
       - *target-not-arm64ec
     assert_instr: [fcmla]
     safety: safe
+    big_endian_inverse: true
     types:
       - float16x4_t
       - float16x8_t
@@ -5869,6 +5880,7 @@ intrinsics:
       - *neon-unstable-fcma
     assert_instr: [fcmla]
     safety: safe
+    big_endian_inverse: true
     types:
       - float32x2_t
       - float32x4_t
@@ -5892,6 +5904,7 @@ intrinsics:
       - *target-not-arm64ec
     assert_instr: [fcmla]
     safety: safe
+    big_endian_inverse: true
     types:
       - float16x4_t
       - float16x8_t
@@ -6043,6 +6056,7 @@ intrinsics:
       - *neon-unstable-fcma
     assert_instr: [fcmla]
     safety: safe
+    big_endian_inverse: true
     types:
       - float32x2_t
       - float32x4_t
@@ -6066,6 +6080,7 @@ intrinsics:
       - *target-not-arm64ec
     assert_instr: [fcmla]
     safety: safe
+    big_endian_inverse: true
     types:
       - float16x4_t
       - float16x8_t
@@ -6661,6 +6676,7 @@ intrinsics:
     attr: [*neon-stable]
     assert_instr: [faddp]
     safety: safe
+    big_endian_inverse: true
     types:
       - [float32x4_t, "4"]
       - [float64x2_t, "2"]
@@ -6683,6 +6699,7 @@ intrinsics:
       - *target-not-arm64ec
     assert_instr: [faddp]
     safety: safe
+    big_endian_inverse: true
     types:
       - [float16x8_t, "8"]
     compose:
@@ -6704,6 +6721,7 @@ intrinsics:
       - *target-not-arm64ec
     assert_instr: [fmaxp]
     safety: safe
+    big_endian_inverse: true
     types:
       - float16x4_t
       - float16x8_t
@@ -6725,6 +6743,7 @@ intrinsics:
       - *target-not-arm64ec
     assert_instr: [fmaxnmp]
     safety: safe
+    big_endian_inverse: true
     types:
       - float16x4_t
       - float16x8_t
@@ -6746,6 +6765,7 @@ intrinsics:
       - *target-not-arm64ec
     assert_instr: [fminp]
     safety: safe
+    big_endian_inverse: true
     types:
       - float16x4_t
       - float16x8_t
@@ -6767,6 +6787,7 @@ intrinsics:
       - *target-not-arm64ec
     assert_instr: [fminnmp]
     safety: safe
+    big_endian_inverse: true
     types:
       - float16x4_t
       - float16x8_t
@@ -6806,6 +6827,7 @@ intrinsics:
     attr: [*neon-stable]
     assert_instr: [fminp]
     safety: safe
+    big_endian_inverse: true
     types:
       - ["s_f32", float32x2_t, f32]
       - ["qd_f64", float64x2_t, f64]
@@ -8455,7 +8477,6 @@ intrinsics:
     attr: [*neon-stable]
     assert_instr: [nop]
     safety: safe
-    big_endian_inverse: true
     types:
       - [float64x1_t, int8x8_t]
       - [float64x1_t, int16x4_t]
@@ -8512,7 +8533,6 @@ intrinsics:
       - *target-not-arm64ec
     assert_instr: [nop]
     safety: safe
-    big_endian_inverse: true
     types:
       - [float64x1_t, float16x4_t]
       - [float16x4_t, float64x1_t]
@@ -8779,6 +8799,7 @@ intrinsics:
       - *neon-stable
     static_defs: ['const LANE1: i32, const LANE2: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - [poly64x2_t, poly64x2_t, poly64x2_t, '1', '1']
     compose:
@@ -8841,6 +8862,7 @@ intrinsics:
       - *neon-stable
     static_defs: ['const LANE1: i32, const LANE2: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - [poly64x2_t, poly64x1_t]
     compose:
@@ -8874,7 +8896,6 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]]
       - *neon-stable
     safety: safe
-    big_endian_inverse: true
     types:
       - ["u64", float64x1_t]
     compose:
@@ -8890,6 +8911,7 @@ intrinsics:
       - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - ["f64", float64x1_t, float64x1_t]
     compose:
@@ -8906,6 +8928,7 @@ intrinsics:
       - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - ["f64", float64x2_t, float64x2_t]
     compose:
@@ -9019,6 +9042,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm3partw1]]}]]
       - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']]
     safety: safe
+    big_endian_inverse: true
     types:
       - uint32x4_t
     compose:
@@ -9037,6 +9061,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm3partw2]]}]]
       - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']]
     safety: safe
+    big_endian_inverse: true
     types:
       - uint32x4_t
     compose:
@@ -9055,6 +9080,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm3ss1]]}]]
       - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']]
     safety: safe
+    big_endian_inverse: true
     types:
       - uint32x4_t
     compose:
@@ -9073,6 +9099,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm4ekey]]}]]
       - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']]
     safety: safe
+    big_endian_inverse: true
     types:
       - uint32x4_t
     compose:
@@ -9091,6 +9118,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm4e]]}]]
       - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']]
     safety: safe
+    big_endian_inverse: true
     types:
       - uint32x4_t
     compose:
@@ -9127,6 +9155,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sha512h]]}]]
       - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']]
     safety: safe
+    big_endian_inverse: true
     types:
       - uint64x2_t
     compose:
@@ -9145,6 +9174,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sha512h2]]}]]
       - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']]
     safety: safe
+    big_endian_inverse: true
     types:
       - uint64x2_t
     compose:
@@ -9163,6 +9193,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sha512su0]]}]]
       - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']]
     safety: safe
+    big_endian_inverse: true
     types:
       - uint64x2_t
     compose:
@@ -9181,6 +9212,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sha512su1]]}]]
       - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']]
     safety: safe
+    big_endian_inverse: true
     types:
       - uint64x2_t
     compose:
@@ -9201,6 +9233,7 @@ intrinsics:
       - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']]
     static_defs: ["const IMM2: i32"]
     safety: safe
+    big_endian_inverse: true
     types:
       - ['1aq_u32', uint32x4_t, 'sm3tt1a', 'SM3TT1A']
       - ['1bq_u32', uint32x4_t, 'sm3tt1b', 'SM3TT1B']
@@ -9445,6 +9478,7 @@ intrinsics:
       - *neon-stable
       - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [trn1]]}]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [int8x8_t, '[0, 8, 2, 10, 4, 12, 6, 14]']
       - [int8x16_t, '[0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]']
@@ -9475,6 +9509,7 @@ intrinsics:
       - *target-not-arm64ec
       - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [trn1]]}]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [float16x4_t, '[0, 4, 2, 6]']
       - [float16x8_t, '[0, 8, 2, 10, 4, 12, 6, 14]']
@@ -9489,6 +9524,7 @@ intrinsics:
       - *neon-stable
       - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip1]]}]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [int32x2_t, '[0, 2]']
       - [int64x2_t, '[0, 2]']
@@ -9508,6 +9544,7 @@ intrinsics:
       - *neon-stable
       - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [trn2]]}]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [int8x8_t, '[1, 9, 3, 11, 5, 13, 7, 15]']
       - [int8x16_t, '[1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]']
@@ -9537,6 +9574,7 @@ intrinsics:
       - *target-not-arm64ec
       - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [trn2]]}]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [float16x4_t, '[1, 5, 3, 7]']
       - [float16x8_t, '[1, 9, 3, 11, 5, 13, 7, 15]']
@@ -9551,6 +9589,7 @@ intrinsics:
       - *neon-stable
       - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip2]]}]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [int32x2_t, '[1, 3]']
       - [int64x2_t, '[1, 3]']
@@ -9570,6 +9609,7 @@ intrinsics:
       - *neon-stable
       - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip2]]}]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [int8x8_t, '[4, 12, 5, 13, 6, 14, 7, 15]']
       - [int8x16_t, '[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]']
@@ -9606,6 +9646,7 @@ intrinsics:
       - *target-not-arm64ec
       - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip2]]}]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [float16x4_t, '[2, 6, 3, 7]']
       - [float16x8_t, '[4, 12, 5, 13, 6, 14, 7, 15]']
@@ -9620,6 +9661,7 @@ intrinsics:
       - *neon-stable
       - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip1]]}]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [int8x8_t, '[0, 8, 1, 9, 2, 10, 3, 11]']
       - [int8x16_t, '[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]']
@@ -9657,6 +9699,7 @@ intrinsics:
       - *target-not-arm64ec
       - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip1]]}]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [float16x4_t, '[0, 4, 1, 5]']
       - [float16x8_t, '[0, 8, 1, 9, 2, 10, 3, 11]']
@@ -9671,6 +9714,7 @@ intrinsics:
       - *neon-stable
       - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip1]]}]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [int32x2_t, '[0, 2]']
       - [int64x2_t, '[0, 2]']
@@ -9690,6 +9734,7 @@ intrinsics:
       - *neon-stable
       - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uzp1]]}]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [int8x8_t, '[0, 2, 4, 6, 8, 10, 12, 14]']
       - [int8x16_t, '[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]']
@@ -9719,6 +9764,7 @@ intrinsics:
       - *target-not-arm64ec
       - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uzp1]]}]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [float16x4_t, '[0, 2, 4, 6]']
       - [float16x8_t, '[0, 2, 4, 6, 8, 10, 12, 14]']
@@ -9733,6 +9779,7 @@ intrinsics:
       - *neon-stable
       - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip2]]}]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [int32x2_t, '[1, 3]']
       - [int64x2_t, '[1, 3]']
@@ -9752,6 +9799,7 @@ intrinsics:
       - *neon-stable
       - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uzp2]]}]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [int8x8_t, '[1, 3, 5, 7, 9, 11, 13, 15]']
       - [int8x16_t, '[1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]']
@@ -9785,6 +9833,7 @@ intrinsics:
       - *target-not-arm64ec
       - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uzp2]]}]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [float16x4_t, '[1, 3, 5, 7]']
       - [float16x8_t, '[1, 3, 5, 7, 9, 11, 13, 15]']
@@ -9942,6 +9991,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmaxnmp]]}]]
       - *neon-stable
     safety: safe
+    big_endian_inverse: true
     types:
       - float32x2_t
       - float64x2_t
@@ -11326,6 +11376,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmp]]}]]
       - *neon-stable
     safety: safe
+    big_endian_inverse: true
     types:
       - ['s_f32', float32x2_t, "f32"]
       - ['qd_f64', float64x2_t, "f64"]
@@ -11344,6 +11395,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmaxnmp]]}]]
       - *neon-stable
     safety: safe
+    big_endian_inverse: true
     types:
       - ['s_f32', float32x2_t, "f32"]
       - ['qd_f64', float64x2_t, "f64"]
@@ -11575,6 +11627,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmp]]}]]
       - *neon-stable
     safety: safe
+    big_endian_inverse: true
     types:
       - float32x2_t
       - float64x2_t
@@ -12089,6 +12142,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
       - *neon-stable
     safety: safe
+    big_endian_inverse: true
     types:
       - [int8x8_t, int8x16_t, uint8x8_t, vqtbx1]
       - [int8x16_t, int8x16_t, uint8x16_t, vqtbx1q]
@@ -12108,6 +12162,7 @@ intrinsics:
       - [poly8x8_t, "poly8x16_t", uint8x8_t, "vqtbx1", "_p8"]
       - [uint8x16_t, "uint8x16_t", uint8x16_t, "vqtbx1q", "q_u8"]
       - [poly8x16_t, "poly8x16_t", uint8x16_t, "vqtbx1q", "q_p8"]
+    big_endian_inverse: true
     compose:
       - FnCall:
           - transmute
@@ -12284,6 +12339,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
     safety: safe
+    big_endian_inverse: true
     types:
       - ['int8x16_t', uint8x8_t, 'vqtbl1', 'int8x8_t']
       - ['int8x16_t', uint8x16_t, 'vqtbl1q', 'int8x16_t']
@@ -12303,6 +12359,7 @@ intrinsics:
       - ['poly8x16_t', uint8x8_t, 'vqtbl1', 'poly8x8_t']
       - ['uint8x16_t', uint8x16_t, 'vqtbl1q', 'uint8x16_t']
       - ['poly8x16_t', uint8x16_t, 'vqtbl1q', 'poly8x16_t']
+    big_endian_inverse: true
     compose:
       - FnCall:
           - transmute
@@ -12319,6 +12376,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
     safety: safe
+    big_endian_inverse: true
     types:
       - ['int8x16x2_t', uint8x8_t, 'vqtbl2', 'int8x8_t']
       - ['int8x16x2_t', uint8x16_t, 'vqtbl2q', 'int8x16_t']
@@ -12332,6 +12390,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
+    big_endian_inverse: true
     safety: safe
     types:
       - ['uint8x16x2_t', uint8x8_t, 'vqtbl2', 'uint8x8_t']
@@ -12355,6 +12414,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
       - *neon-stable
     safety: safe
+    big_endian_inverse: true
     types:
       - [int8x8_t, 'int8x16x2_t', uint8x8_t, 'vqtbx2']
       - [int8x16_t, 'int8x16x2_t', uint8x16_t, 'vqtbx2q']
@@ -12368,6 +12428,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
       - *neon-stable
+    big_endian_inverse: true
     safety: safe
     types:
       - [uint8x8_t, 'uint8x16x2_t', uint8x8_t, 'vqtbx2']
@@ -12391,6 +12452,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
+    big_endian_inverse: true
     safety: safe
     types:
       - ['int8x8_t', 'int8x16x3_t', uint8x8_t, 'vqtbl3']
@@ -12405,6 +12467,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
+    big_endian_inverse: true
     safety: safe
     types:
       - ['uint8x8_t', 'uint8x16x3_t', uint8x8_t, 'vqtbl3']
@@ -12429,6 +12492,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
       - *neon-stable
     safety: safe
+    big_endian_inverse: true
     types:
       - [int8x8_t, 'int8x16x3_t', uint8x8_t, 'vqtbx3']
       - [int8x16_t, 'int8x16x3_t', uint8x16_t, 'vqtbx3q']
@@ -12442,6 +12506,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
       - *neon-stable
+    big_endian_inverse: true
     safety: safe
     types:
       - [uint8x8_t, 'uint8x16x3_t', uint8x8_t, 'vqtbx3']
@@ -12466,6 +12531,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
+    big_endian_inverse: true
     safety: safe
     types:
       - ['int8x16x4_t', uint8x8_t, 'vqtbl4', 'int8x8_t']
@@ -12480,6 +12546,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
       - *neon-stable
+    big_endian_inverse: true
     safety: safe
     types:
       - ['uint8x16x4_t', uint8x8_t, 'vqtbl4', 'uint8x8_t']
@@ -12505,6 +12572,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
       - *neon-stable
     safety: safe
+    big_endian_inverse: true
     types:
       - [int8x8_t, 'int8x16x4_t', uint8x8_t, 'vqtbx4']
       - [int8x16_t, 'int8x16x4_t', uint8x16_t, 'vqtbx4q']
@@ -12518,6 +12586,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
       - *neon-stable
+    big_endian_inverse: true
     safety: safe
     types:
       - [uint8x8_t, 'uint8x16x4_t', uint8x8_t, 'vqtbx4']
@@ -12964,6 +13033,7 @@ intrinsics:
       - *neon-stable
     assert_instr: [addp]
     safety: safe
+    big_endian_inverse: true
     types:
       - [int8x16_t, "16"]
       - [int16x8_t, "8"]
@@ -13252,6 +13322,7 @@ intrinsics:
       - *neon-stable
     assert_instr: ['sminp']
     safety: safe
+    big_endian_inverse: true
     types:
       - int8x16_t
       - int16x8_t
@@ -13271,6 +13342,7 @@ intrinsics:
       - *neon-stable
     assert_instr: ['uminp']
     safety: safe
+    big_endian_inverse: true
     types:
       - uint8x16_t
       - uint16x8_t
@@ -13290,6 +13362,7 @@ intrinsics:
       - *neon-stable
     assert_instr: ['fminp']
     safety: safe
+    big_endian_inverse: true
     types:
       - float32x4_t
       - float64x2_t
@@ -13308,6 +13381,7 @@ intrinsics:
       - *neon-stable
     assert_instr: ['smaxp']
     safety: safe
+    big_endian_inverse: true
     types:
       - int8x16_t
       - int16x8_t
@@ -13327,6 +13401,7 @@ intrinsics:
       - *neon-stable
     assert_instr: ['umaxp']
     safety: safe
+    big_endian_inverse: true
     types:
       - uint8x16_t
       - uint16x8_t
@@ -13346,6 +13421,7 @@ intrinsics:
       - *neon-stable
     assert_instr: ['fmaxp']
     safety: safe
+    big_endian_inverse: true
     types:
       - float32x4_t
       - float64x2_t
@@ -13500,6 +13576,7 @@ intrinsics:
       - *target-not-arm64ec
     assert_instr: [fmlal2]
     safety: safe
+    big_endian_inverse: true
     types:
       - [float32x2_t, float16x4_t, '_high_']
       - [float32x4_t, float16x8_t, 'q_high_']
@@ -13548,6 +13625,7 @@ intrinsics:
       - *target-not-arm64ec
     assert_instr: [fmlal]
     safety: safe
+    big_endian_inverse: true
     types:
       - [float32x2_t, float16x4_t, '_low_']
       - [float32x4_t, float16x8_t, 'q_low_']
@@ -13596,6 +13674,7 @@ intrinsics:
       - *target-not-arm64ec
     assert_instr: [fmlsl2]
     safety: safe
+    big_endian_inverse: true
     types:
       - [float32x2_t, float16x4_t, '_high_']
       - [float32x4_t, float16x8_t, 'q_high_']
@@ -13643,6 +13722,7 @@ intrinsics:
       - *target-not-arm64ec
     assert_instr: [fmlsl]
     safety: safe
+    big_endian_inverse: true
     types:
       - [float32x2_t, float16x4_t, '_low_']
       - [float32x4_t, float16x8_t, 'q_low_']
@@ -14104,6 +14184,7 @@ intrinsics:
       - *neon-stable
     assert_instr: ['{type[3]}']
     safety: safe
+    big_endian_inverse: true
     types:
       - ['vget_high_f64', 'float64x2_t', 'float64x1_t', 'fmov', 'float64x1_t([simd_extract!(a, 1)])']
       - ['vget_low_f64', 'float64x2_t', 'float64x1_t', 'nop', 'float64x1_t([simd_extract!(a, 0)])']
@@ -14118,6 +14199,7 @@ intrinsics:
       - *neon-stable
     assert_instr: [mov]
     safety: safe
+    big_endian_inverse: true
     types:
       - [float64x1_t, float64x2_t, '[0, 1]']
     compose:
@@ -14132,6 +14214,7 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['1']]
     assert_instr: [['nop', 'IMM5 = 0']]
     safety: safe
+    big_endian_inverse: true
     static_defs: ["const IMM5: i32"]
     types:
       - ['float64x2_t', 'f64']
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
index 972d838f42dcf..e8682cf45feb8 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@@ -1442,6 +1442,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
     static_defs: ['const N: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - [_lane_s8, int8x8_t, int8x8_t, '3', '[N as u32; 8]']
       - [q_lane_s8, int8x8_t, int8x16_t, '3', '[N as u32; 16]']
@@ -1466,6 +1467,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
     static_defs: ['const N: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - [q_laneq_s8, int8x16_t, int8x16_t, '4', '[N as u32; 16]']
       - [_laneq_s8, int8x16_t, int8x8_t, '4', '[N as u32; 8]']
@@ -1490,6 +1492,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
     static_defs: ['const N: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - [_lane_s16, int16x4_t, int16x4_t, '2', '[N as u32; 4]']
       - [q_lane_s16, int16x4_t, int16x8_t, '2', '[N as u32; 8]']
@@ -1514,6 +1517,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
     static_defs: ['const N: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[N as u32; 8]']
       - [_laneq_s16, int16x8_t, int16x4_t, '3', '[N as u32; 4]']
@@ -1541,6 +1545,7 @@ intrinsics:
       - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - [q_laneq_f16, float16x8_t, float16x8_t, '3', '[N as u32; 8]']
       - [_laneq_f16, float16x8_t, float16x4_t, '3', '[N as u32; 4]']
@@ -1581,6 +1586,7 @@ intrinsics:
       - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - [_lane_f16, float16x4_t, float16x4_t, '2', '[N as u32; 4]']
       - [q_lane_f16, float16x4_t, float16x8_t, '2', '[N as u32; 8]']
@@ -1602,6 +1608,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
     static_defs: ['const N: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - [_lane_s32, int32x2_t, int32x2_t, '1', '[N as u32, N as u32]']
       - [q_lane_s32, int32x2_t, int32x4_t, '1', '[N as u32, N as u32, N as u32, N as u32]']
@@ -1626,6 +1633,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
     static_defs: ['const N: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - [q_laneq_s32, int32x4_t, int32x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]']
       - [_laneq_s32, int32x4_t, int32x2_t, '2', '[N as u32, N as u32]']
@@ -1650,6 +1658,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
     static_defs: ['const N: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - [q_laneq_s64, int64x2_t, '1', '[N as u32, N as u32]']
       - [q_laneq_u64, uint64x2_t, '1', '[N as u32, N as u32]']
@@ -1670,6 +1679,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
     static_defs: ['const N: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - [q_lane_s64, int64x1_t, int64x2_t]
       - [q_lane_u64, uint64x1_t, uint64x2_t]
@@ -1732,6 +1742,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
     static_defs: ['const N: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - int8x8_t
       - int16x8_t
@@ -1756,6 +1767,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
     static_defs: ['const N: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - int8x16_t
       - uint8x16_t
@@ -1777,6 +1789,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
     static_defs: ['const N: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - int16x4_t
       - int32x4_t
@@ -1804,6 +1817,7 @@ intrinsics:
       - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - float16x4_t
     compose:
@@ -1825,6 +1839,7 @@ intrinsics:
       - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - float16x8_t
     compose:
@@ -1846,6 +1861,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
     static_defs: ['const N: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - int32x2_t
       - uint32x2_t
@@ -1867,6 +1883,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
     static_defs: ['const N: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - int64x2_t
       - uint64x2_t
@@ -6138,6 +6155,7 @@ intrinsics:
       - *neon-unstable-i8mm
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - [int32x2_t, uint8x8_t, int8x8_t]
       - [int32x4_t, uint8x16_t, int8x16_t]
@@ -7076,6 +7094,7 @@ intrinsics:
       - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]]
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - [int32x2_t, int8x8_t]
       - [int32x4_t, int8x16_t]
@@ -7100,6 +7119,7 @@ intrinsics:
       - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]]
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - [uint32x2_t, uint8x8_t]
       - [uint32x4_t, uint8x16_t]
@@ -7473,6 +7493,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - float32x2_t
     compose:
@@ -7497,6 +7518,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
+    big_endian_inverse: true
     types:
       - float16x4_t
     compose:
@@ -8494,7 +8516,6 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
-    big_endian_inverse: true
     types:
       - [poly64x1_t, int32x2_t]
       - [poly64x1_t, uint32x2_t]
@@ -8790,7 +8811,6 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
-    big_endian_inverse: true
     types:
       # non-q
       - [float32x2_t, float16x4_t]
@@ -8873,7 +8893,6 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
-    big_endian_inverse: true
     types:
       - [poly64x1_t, float16x4_t]
       - [float16x4_t, poly64x1_t]
@@ -8898,6 +8917,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
+    big_endian_inverse: true
     types:
       - [float16x4_t, "[3, 2, 1, 0]"]
       - [float16x8_t, "[3, 2, 1, 0, 7, 6, 5, 4]"]
@@ -9187,7 +9207,6 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
-    big_endian_inverse: true
     types:
       - ["u64", int8x8_t]
       - ["u64", int16x4_t]
@@ -9216,7 +9235,6 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
-    big_endian_inverse: true
     types:
       - ["u64", float16x4_t]
     compose:
@@ -9234,7 +9252,6 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
-    big_endian_inverse: true
     types:
       - ["u64", poly64x1_t]
     compose:
@@ -9253,6 +9270,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
     static_defs: ['const LANE: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - ["i8", int8x8_t, '3']
       - ["i16", int16x4_t, '2']
@@ -9293,6 +9311,7 @@ intrinsics:
       - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - ["f16", float16x4_t, '2']
       - ["f16", float16x8_t, '3']
@@ -9314,6 +9333,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
     static_defs: ['const LANE: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - ["i64", int64x1_t, int64x1_t]
       - ["u64", uint64x1_t, uint64x1_t]
@@ -9335,6 +9355,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
     static_defs: ['const LANE: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - ["p64", poly64x1_t, poly64x1_t]
     compose:
@@ -9355,6 +9376,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
     static_defs: ['const LANE: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - ["p64", poly64x2_t, poly64x2_t]
     compose:
@@ -9681,6 +9703,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - [int8x8_t, int8x8x2_t, '[0, 8, 2, 10, 4, 12, 6, 14]', '[1, 9, 3, 11, 5, 13, 7, 15]']
       - [int16x4_t, int16x4x2_t, '[0, 4, 2, 6]', '[1, 5, 3, 7]']
@@ -9725,6 +9748,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
+    big_endian_inverse: true
     types:
       - [float16x4_t, float16x4x2_t, '[0, 4, 2, 6]', '[1, 5, 3, 7]']
       - [float16x8_t, float16x8x2_t, '[0, 8, 2, 10, 4, 12, 6, 14]', '[1, 9, 3, 11, 5, 13, 7, 15]']
@@ -9754,6 +9778,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - [int32x2_t, int32x2x2_t, '[0, 2]', '[1, 3]']
       - [uint32x2_t, uint32x2x2_t, '[0, 2]', '[1, 3]']
@@ -9783,6 +9808,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - [int8x16_t, int8x16x2_t, '[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]', '[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]']
       - [int16x8_t, int16x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]']
@@ -9818,6 +9844,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - [int32x2_t, int32x2x2_t, '[0, 2]', '[1, 3]']
       - [uint32x2_t, uint32x2x2_t, '[0, 2]', '[1, 3]']
@@ -9847,6 +9874,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - [int8x8_t, int8x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]']
       - [int16x4_t, int16x4x2_t, '[0, 4, 1, 5]', '[2, 6, 3, 7]']
@@ -9882,6 +9910,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
+    big_endian_inverse: true
     types:
       - [float16x4_t, float16x4x2_t, '[0, 4, 1, 5]', '[2, 6, 3, 7]']
       - [float16x8_t, float16x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]']
@@ -9910,6 +9939,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - [int8x8_t, int8x8x2_t, '[0, 2, 4, 6, 8, 10, 12, 14]', '[1, 3, 5, 7, 9, 11, 13, 15]']
       - [int16x4_t, int16x4x2_t, '[0, 2, 4, 6]', '[1, 3, 5, 7]']
@@ -9954,6 +9984,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
+    big_endian_inverse: true
     types:
       - [float16x4_t, float16x4x2_t, '[0, 2, 4, 6]', '[1, 3, 5, 7]']
       - [float16x8_t, float16x8x2_t, '[0, 2, 4, 6, 8, 10, 12, 14]', '[1, 3, 5, 7, 9, 11, 13, 15]']
@@ -9983,6 +10014,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - [float32x2_t, float32x2x2_t, '[0, 2]', '[1, 3]']
       - [int32x2_t, int32x2x2_t, '[0, 2]', '[1, 3]']
@@ -12502,6 +12534,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - [float32x2_t, float32x4_t, '[0, 1, 2, 3]']
       - [poly8x8_t, poly8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
@@ -12529,6 +12562,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
     safety: safe
+    big_endian_inverse: true
     types:
       - uint8x16_t
     compose:
@@ -12551,6 +12585,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
     safety: safe
+    big_endian_inverse: true
     types:
       - uint8x16_t
     compose:
@@ -12573,6 +12608,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [uint8x16_t, "aesmc"]
     compose:
@@ -12595,6 +12631,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [uint8x16_t, "aesimc"]
     compose:
@@ -12617,6 +12654,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [u32, "sha1h"]
     compose:
@@ -12639,6 +12677,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [u32, "sha1c", "uint32x4_t"]
     compose:
@@ -12661,6 +12700,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [u32, "sha1m", "uint32x4_t"]
     compose:
@@ -12683,6 +12723,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [u32, "sha1p", "uint32x4_t"]
     compose:
@@ -12705,6 +12746,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [uint32x4_t, "sha1su0"]
     compose:
@@ -12727,6 +12769,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [uint32x4_t, "sha1su1"]
     compose:
@@ -12749,6 +12792,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [uint32x4_t, "sha256h"]
     compose:
@@ -12771,6 +12815,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [uint32x4_t, "sha256h2"]
     compose:
@@ -12793,6 +12838,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [uint32x4_t, "sha256su0"]
     compose:
@@ -12815,6 +12861,7 @@ intrinsics:
       - *neon-cfg-arm-unstable
       - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
     safety: safe
+    big_endian_inverse: true
     types:
       - [uint32x4_t, "sha256su1"]
     compose:
@@ -13052,6 +13099,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - int8x8_t
       - int16x4_t
@@ -13076,6 +13124,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - uint8x8_t
       - uint16x4_t
@@ -13100,6 +13149,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - float32x2_t
     compose:
@@ -13122,6 +13172,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - int8x8_t
       - int16x4_t
@@ -13146,6 +13197,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - uint8x8_t
       - uint16x4_t
@@ -13170,6 +13222,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - float32x2_t
     compose:
@@ -13290,6 +13343,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - int8x8_t
       - int16x4_t
@@ -14140,6 +14194,7 @@ intrinsics:
       - *target-not-arm64ec
     assert_instr: [nop]
     safety: safe
+    big_endian_inverse: true
     types:
       - [float16x4_t, float16x8_t]
     compose:
@@ -14157,6 +14212,7 @@ intrinsics:
       - *target-not-arm64ec
     assert_instr: [nop]
     safety: safe
+    big_endian_inverse: true
     types:
       - [float16x4_t, float16x8_t, 'low', "[0, 1, 2, 3]"]
       - [float16x4_t, float16x8_t, 'high', "[4, 5, 6, 7]"]
@@ -14177,6 +14233,7 @@ intrinsics:
       - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety: safe
+    big_endian_inverse: true
     types:
       - [float16x4_t, f16, '_lane_f16',  '2']
       - [float16x8_t, f16, 'q_lane_f16', '3']
@@ -14928,6 +14985,7 @@ intrinsics:
     arguments: ["v: {neon_type[1]}"]
     return_type: "{type[2]}"
     safety: safe
+    big_endian_inverse: true
     static_defs: ['const IMM5: i32']
     attr:
       - *neon-v7
@@ -14966,6 +15024,7 @@ intrinsics:
     arguments: ["v: {neon_type[1]}"]
     return_type: "{type[2]}"
     safety: safe
+    big_endian_inverse: true
     static_defs: ['const IMM5: i32']
     attr:
       - *neon-v7
@@ -15012,6 +15071,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - ['vget_high_s64', 'int64x2_t',   'int64x1_t',   'vmov', 'ext',  'int64x1_t([simd_extract!(a, 1)])']
       - ['vget_high_u64', 'uint64x2_t',  'uint64x1_t',  'vmov', 'ext',  'uint64x1_t([simd_extract!(a, 1)])']
@@ -15029,6 +15089,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - ['vget_low_s64', 'int64x2_t',   'int64x1_t',   'int64x1_t([simd_extract!(a, 0)])']
       - ['vget_low_u64', 'uint64x2_t',  'uint64x1_t',  'uint64x1_t([simd_extract!(a, 0)])']
@@ -15047,6 +15108,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - ['vget_high_s8', 'int8x16_t', 'int8x8_t', 'vmov', 'ext', '[8, 9, 10, 11, 12, 13, 14, 15]']
       - ['vget_high_u8', 'uint8x16_t', 'uint8x8_t', 'vmov', 'ext', '[8, 9, 10, 11, 12, 13, 14, 15]']
@@ -15070,6 +15132,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - ['vget_low_s8', 'int8x16_t', 'int8x8_t', '[0, 1, 2, 3, 4, 5, 6, 7]']
       - ['vget_low_u8', 'uint8x16_t', 'uint8x8_t','[0, 1, 2, 3, 4, 5, 6, 7]']
@@ -15222,6 +15285,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - ['vrev16_s8', 'int8x8_t', 'vrev16.8', 'rev16', '[1, 0, 3, 2, 5, 4, 7, 6]']
       - ['vrev16q_s8', 'int8x16_t', 'vrev16.8', 'rev16', '[1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]']

From 0415e2fda799ef84775a26359f11e63061de50e4 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Sun, 10 May 2026 07:09:31 +0530
Subject: [PATCH 61/78] Disable some `assert_instr` tests in big-endian

---
 .../core_arch/src/aarch64/neon/generated.rs   | 24 +++++++++----------
 .../spec/neon/aarch64.spec.yml                | 10 ++++----
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 0672f66c9e08b..8d701d9b88056 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -9852,7 +9852,7 @@ pub fn vfmsd_laneq_f64<const LANE: i32>(a: f64, b: f64, c: float64x2_t) -> f64 {
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fmov))]
+#[cfg_attr(test, assert_instr(nop))]
 pub fn vget_high_f64(a: float64x2_t) -> float64x1_t {
     unsafe { float64x1_t([simd_extract!(a, 1)]) }
 }
@@ -9862,7 +9862,7 @@ pub fn vget_high_f64(a: float64x2_t) -> float64x1_t {
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fmov))]
+#[cfg_attr(test, assert_instr(nop))]
 pub fn vget_high_f64(a: float64x2_t) -> float64x1_t {
     unsafe {
         let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
@@ -16356,7 +16356,7 @@ pub fn vqdmlal_laneq_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t)
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlalh_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlalh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
@@ -16367,7 +16367,7 @@ pub fn vqdmlalh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlalh_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlalh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32 {
@@ -16378,7 +16378,7 @@ pub fn vqdmlalh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlals_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlals_lane_s32<const LANE: i32>(a: i64, b: i32, c: int32x2_t) -> i64 {
@@ -16389,7 +16389,7 @@ pub fn vqdmlals_lane_s32<const LANE: i32>(a: i64, b: i32, c: int32x2_t) -> i64 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlals_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlals_laneq_s32<const LANE: i32>(a: i64, b: i32, c: int32x4_t) -> i64 {
@@ -16400,7 +16400,7 @@ pub fn vqdmlals_laneq_s32<const LANE: i32>(a: i64, b: i32, c: int32x4_t) -> i64
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlalh_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlalh_s16(a: i32, b: i16, c: i16) -> i32 {
     let x: int32x4_t = vqdmull_s16(vdup_n_s16(b), vdup_n_s16(c));
@@ -16522,7 +16522,7 @@ pub fn vqdmlsl_laneq_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t)
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlslh_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlslh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
@@ -16533,7 +16533,7 @@ pub fn vqdmlslh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlslh_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlslh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32 {
@@ -16544,7 +16544,7 @@ pub fn vqdmlslh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsls_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsls_lane_s32<const LANE: i32>(a: i64, b: i32, c: int32x2_t) -> i64 {
@@ -16555,7 +16555,7 @@ pub fn vqdmlsls_lane_s32<const LANE: i32>(a: i64, b: i32, c: int32x2_t) -> i64 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsls_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsls_laneq_s32<const LANE: i32>(a: i64, b: i32, c: int32x4_t) -> i64 {
@@ -16566,7 +16566,7 @@ pub fn vqdmlsls_laneq_s32<const LANE: i32>(a: i64, b: i32, c: int32x4_t) -> i64
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlslh_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlslh_s16(a: i32, b: i16, c: i16) -> i32 {
     let x: int32x4_t = vqdmull_s16(vdup_n_s16(b), vdup_n_s16(c));
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index dc83635a0ef3a..03fea5e0a4af3 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -7083,7 +7083,7 @@ intrinsics:
     arguments: ["a: {type[0]}", "b: {type[1]}", "c: {type[1]}"]
     return_type: "{type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlal]]}]]
       - *neon-stable
     safety: safe
     types:
@@ -7111,7 +7111,7 @@ intrinsics:
     arguments: ["a: {type[0]}", "b: {type[1]}", "c: {neon_type[2]}"]
     return_type: "{type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlal, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-stable
     static_defs: ['const LANE: i32']
@@ -7197,7 +7197,7 @@ intrinsics:
     arguments: ["a: {type[0]}", "b: {type[1]}", "c: {type[1]}"]
     return_type: "{type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlsl]]}]]
       - *neon-stable
     safety: safe
     types:
@@ -7225,7 +7225,7 @@ intrinsics:
     arguments: ["a: {type[0]}", "b: {type[1]}", "c: {neon_type[2]}"]
     return_type: "{type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlsl, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-stable
     static_defs: ['const LANE: i32']
@@ -14186,7 +14186,7 @@ intrinsics:
     safety: safe
     big_endian_inverse: true
     types:
-      - ['vget_high_f64', 'float64x2_t', 'float64x1_t', 'fmov', 'float64x1_t([simd_extract!(a, 1)])']
+      - ['vget_high_f64', 'float64x2_t', 'float64x1_t', 'nop', 'float64x1_t([simd_extract!(a, 1)])']
       - ['vget_low_f64', 'float64x2_t', 'float64x1_t', 'nop', 'float64x1_t([simd_extract!(a, 0)])']
     compose:
       - Identifier: ['{type[4]}', UnsafeSymbol]

From b13eb7790b4bd614bbfc8159d490b9fbc1bd55b0 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Thu, 30 Apr 2026 05:26:34 +0530
Subject: [PATCH 62/78] Fix `_mm_sm3rnds2_epi32`

---
 library/stdarch/crates/intrinsic-test/missing_x86.txt       | 3 ---
 library/stdarch/crates/intrinsic-test/src/x86/constraint.rs | 5 ++++-
 library/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/library/stdarch/crates/intrinsic-test/missing_x86.txt b/library/stdarch/crates/intrinsic-test/missing_x86.txt
index f88a125bfdff8..6438dde18762b 100644
--- a/library/stdarch/crates/intrinsic-test/missing_x86.txt
+++ b/library/stdarch/crates/intrinsic-test/missing_x86.txt
@@ -45,9 +45,6 @@ _mm_set1_pch
 _tpause
 _umwait
 
-# IMM8 must be an even number in the range `0..=62`
-_mm_sm3rnds2_epi32
-
 # SDE ERROR: Cannot execute XGETBV with ECX != 0
 _xgetbv
 
diff --git a/library/stdarch/crates/intrinsic-test/src/x86/constraint.rs b/library/stdarch/crates/intrinsic-test/src/x86/constraint.rs
index 72f5da3b3faf3..608ffdd1eed96 100644
--- a/library/stdarch/crates/intrinsic-test/src/x86/constraint.rs
+++ b/library/stdarch/crates/intrinsic-test/src/x86/constraint.rs
@@ -1,7 +1,10 @@
 use crate::common::constraint::Constraint;
 
-pub fn map_constraints(imm_type: &String, imm_width: u32) -> Option<Constraint> {
+pub fn map_constraints(fn_name: &str, imm_type: &String, imm_width: u32) -> Option<Constraint> {
     if imm_width > 0 {
+        if fn_name == "_mm_sm3rnds2_epi32" {
+            return Some(Constraint::Set((0..64).step_by(2).collect()));
+        }
         let max: i64 = 2i64.pow(imm_width);
         return Some(Constraint::Range(0..max));
     }
diff --git a/library/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs b/library/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs
index 681b1a3c52743..6006d7919f875 100644
--- a/library/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs
+++ b/library/stdarch/crates/intrinsic-test/src/x86/xml_parser.rs
@@ -99,7 +99,7 @@ fn xml_to_intrinsic(
             } else {
                 param.imm_width
             };
-            let constraint = map_constraints(&param.imm_type, effective_imm_width);
+            let constraint = map_constraints(&name, &param.imm_type, effective_imm_width);
             let arg = Argument::<X86IntrinsicType>::new(
                 i,
                 param.var_name.clone(),

From deaa5091fdc5d5e84a3f43b33e00127d4706ac56 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Mon, 11 May 2026 05:11:32 +0530
Subject: [PATCH 63/78] Disable some tests in x86 due to CI failures

---
 .../crates/intrinsic-test/missing_x86.txt     | 23 +++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/library/stdarch/crates/intrinsic-test/missing_x86.txt b/library/stdarch/crates/intrinsic-test/missing_x86.txt
index 6438dde18762b..c7aabb95a84b0 100644
--- a/library/stdarch/crates/intrinsic-test/missing_x86.txt
+++ b/library/stdarch/crates/intrinsic-test/missing_x86.txt
@@ -49,16 +49,31 @@ _umwait
 _xgetbv
 
 # top bits are undefined, unclear how to test these
+_mm256_castph128_ph256
+_mm256_castps128_ps256
+_mm256_castpd128_pd256
 _mm256_castsi128_si256
+
+_mm512_castph128_ph512
+_mm512_castps128_ps512
+_mm512_castpd128_pd512
 _mm512_castsi128_si512
+
+_mm512_castph256_ph512
+_mm512_castps256_ps512
+_mm512_castpd256_pd512
 _mm512_castsi256_si512
 
 # Clang bug
-_mm256_extract_epi16
-_mm256_extract_epi8
 _mm512_mask_reduce_max_pd
 _mm512_mask_reduce_max_ps
 _mm512_mask_reduce_min_pd
 _mm512_mask_reduce_min_ps
-_mm_extract_epi16
-_mm_extract_epi8
+
+# Rounding errors in release mode
+_mm_maskz_fmadd_sd
+_mm_maskz_fmadd_ss
+_mm_maskz_fmsub_sd
+_mm_maskz_fmsub_ss
+_mm_maskz_fnmadd_sd
+_mm_maskz_fnmadd_ss

From 921b35bb6d1b872b9ffb36ac202d911cebea6a79 Mon Sep 17 00:00:00 2001
From: Chris Denton <chris@chrisdenton.dev>
Date: Mon, 11 May 2026 09:02:43 +0000
Subject: [PATCH 64/78] Implement OsStr::split_at

---
 library/core/src/wtf8.rs            |  37 ++++++++--
 library/std/src/ffi/os_str.rs       |  55 +++++++++++++++
 library/std/src/ffi/os_str/tests.rs | 102 ++++++++++++++++++++++++++++
 library/std/src/sys/os_str/bytes.rs |  26 ++++---
 library/std/src/sys/os_str/utf8.rs  |   8 ++-
 library/std/src/sys/os_str/wtf8.rs  |   5 ++
 6 files changed, 217 insertions(+), 16 deletions(-)

diff --git a/library/core/src/wtf8.rs b/library/core/src/wtf8.rs
index a0978c3dafb48..698e17a6b8e6d 100644
--- a/library/core/src/wtf8.rs
+++ b/library/core/src/wtf8.rs
@@ -454,25 +454,50 @@ impl Wtf8 {
     #[track_caller]
     #[inline]
     pub fn check_utf8_boundary(&self, index: usize) {
+        let Err(err) = self.try_check_utf8_boundary(index) else { return };
+        match err {
+            Utf8BoundaryError::NotABoundary => {
+                panic!("byte index {index} is not a codepoint boundary")
+            }
+            Utf8BoundaryError::OutOfBounds => panic!("byte index {index} is out of bounds"),
+            Utf8BoundaryError::BetweenSurrogates => {
+                panic!("byte index {index} lies between surrogate codepoints")
+            }
+        }
+    }
+
+    #[track_caller]
+    #[inline]
+    pub fn try_check_utf8_boundary(&self, index: usize) -> Result<(), Utf8BoundaryError> {
         if index == 0 {
-            return;
+            return Ok(());
         }
         match self.bytes.get(index) {
             Some(0xED) => (), // Might be a surrogate
-            Some(&b) if (b as i8) >= -0x40 => return,
-            Some(_) => panic!("byte index {index} is not a codepoint boundary"),
-            None if index == self.len() => return,
-            None => panic!("byte index {index} is out of bounds"),
+            Some(&b) if (b as i8) >= -0x40 => return Ok(()),
+            Some(_) => return Err(Utf8BoundaryError::NotABoundary),
+            None if index == self.len() => return Ok(()),
+            None => return Err(Utf8BoundaryError::OutOfBounds),
         }
         if self.bytes[index + 1] >= 0xA0 {
             // There's a surrogate after index. Now check before index.
             if index >= 3 && self.bytes[index - 3] == 0xED && self.bytes[index - 2] >= 0xA0 {
-                panic!("byte index {index} lies between surrogate codepoints");
+                return Err(Utf8BoundaryError::BetweenSurrogates);
             }
         }
+        Ok(())
     }
 }
 
+// This error type is only used temporarily to provide better panic messages
+// It does not implement Error.
+#[derive(Debug)]
+pub enum Utf8BoundaryError {
+    NotABoundary,
+    OutOfBounds,
+    BetweenSurrogates,
+}
+
 /// Copied from core::str::raw::slice_unchecked
 #[inline]
 unsafe fn slice_unchecked(s: &Wtf8, begin: usize, end: usize) -> &Wtf8 {
diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs
index f0cebb05c76a5..be606ae69d6d9 100644
--- a/library/std/src/ffi/os_str.rs
+++ b/library/std/src/ffi/os_str.rs
@@ -1055,6 +1055,61 @@ impl OsStr {
         OsString { inner: Buf::from_box(boxed) }
     }
 
+    /// Divides one string slice into two at an index.
+    ///
+    /// The two slices returned go from the start of the string slice to `mid`, and from `mid` to the end of the string slice.
+    ///
+    /// The argument, `mid`, should be a byte offset from the start of the string.
+    /// It must also be on a valid `OsStr` boundary.
+    /// See [`split_at_checked`][Self::split_at_checked] for the definition of a valid boundary.
+    ///
+    /// Panics
+    ///
+    /// Panics if `mid` is not on a valid boundary, or if it is past the end of the last code point of the string slice.
+    /// For a non-panicking alternative see [`split_at_checked`][Self::split_at_checked].
+    #[unstable(feature = "os_str_split_at", issue = "none")]
+    pub fn split_at(&self, mid: usize) -> (&OsStr, &OsStr) {
+        self.inner.check_public_boundary(mid);
+
+        // SAFETY: we've checked it's in bounds and a valid boundary
+        unsafe { self.split_at_unchecked(mid) }
+    }
+
+    /// Divides one string slice into two at an index.
+    ///
+    /// The two slices returned go from the start of the string slice to `mid`, and from `mid` to the end of the string slice.
+    ///
+    /// The argument, `mid`, should be a valid byte offset from the start of the string.
+    /// It must also be on a valid `OsStr` boundary.
+    /// The method returns `None` if that’s not the case.
+    /// A valid `OsStr` boundary is one of:
+    /// - The start of the string
+    /// - The end of the string
+    /// - The start of a valid non-empty UTF-8 substring
+    /// - Immediately follows a valid non-empty UTF-8 substring
+    #[unstable(feature = "os_str_split_at", issue = "none")]
+    pub fn split_at_checked(&self, mid: usize) -> Option<(&OsStr, &OsStr)> {
+        self.inner.try_check_public_boundary(mid)?;
+
+        // SAFETY: we've checked it's in bounds and a valid boundary
+        unsafe { Some(self.split_at_unchecked(mid)) }
+    }
+
+    /// Splits an `OsStr` without checking if `mid` is a valid boundary.
+    /// You should use `split_at` or `split_at_checked` instead.
+    ///
+    /// # Safety
+    ///
+    /// Any caller must ensure `mid` is within bounds and lies on
+    /// a valid `OsStr` boundary for the platform.
+    unsafe fn split_at_unchecked(&self, mid: usize) -> (&OsStr, &OsStr) {
+        // SAFETY: it's up to the caller to ensure this is safe.
+        unsafe {
+            let (first, second) = self.as_encoded_bytes().split_at_unchecked(mid);
+            (Self::from_encoded_bytes_unchecked(first), Self::from_encoded_bytes_unchecked(second))
+        }
+    }
+
     /// Converts an OS string slice to a byte slice.  To convert the byte slice back into an OS
     /// string slice, use the [`OsStr::from_encoded_bytes_unchecked`] function.
     ///
diff --git a/library/std/src/ffi/os_str/tests.rs b/library/std/src/ffi/os_str/tests.rs
index 3474f0ab50684..1075c43e0b3bf 100644
--- a/library/std/src/ffi/os_str/tests.rs
+++ b/library/std/src/ffi/os_str/tests.rs
@@ -289,6 +289,108 @@ fn slice_surrogate_edge() {
     assert_eq!(post_crab.slice_encoded_bytes(4..), surrogate);
 }
 
+#[test]
+fn os_str_slice_at() {
+    #[track_caller]
+    fn slice_at_ok(input: &OsStr, index: usize, expected: (&str, &str)) {
+        let expected = (OsStr::new(expected.0), OsStr::new(expected.1));
+        assert_eq!(input.split_at(index), expected);
+        assert_eq!(input.split_at_checked(index), Some(expected));
+    }
+
+    let os_str = OsStr::new("123გ🦀4");
+    slice_at_ok(os_str, 0, ("", "123გ🦀4"));
+    slice_at_ok(os_str, 1, ("1", "23გ🦀4"));
+    slice_at_ok(os_str, 2, ("12", "3გ🦀4"));
+    slice_at_ok(os_str, 3, ("123", "გ🦀4"));
+    slice_at_ok(os_str, 6, ("123გ", "🦀4"));
+    slice_at_ok(os_str, 10, ("123გ🦀", "4"));
+    slice_at_ok(os_str, 11, ("123გ🦀4", ""));
+
+    // Invalid boundaries should fail.
+    assert!(os_str.split_at_checked(4).is_none());
+    assert!(os_str.split_at_checked(5).is_none());
+    assert!(os_str.split_at_checked(7).is_none());
+    assert!(os_str.split_at_checked(8).is_none());
+    assert!(os_str.split_at_checked(9).is_none());
+    // Out of bounds
+    assert!(os_str.split_at_checked(12).is_none());
+}
+
+#[test]
+#[should_panic]
+fn os_str_slice_at_out_of_bounds() {
+    let crab = OsStr::new("🦀");
+    let _ = crab.split_at(5);
+}
+
+#[test]
+#[should_panic]
+fn os_str_slice_at_mid_char() {
+    let crab = OsStr::new("🦀");
+    let _ = crab.split_at(2);
+}
+
+#[cfg(unix)]
+#[test]
+fn os_str_slice_at_unix() {
+    use crate::os::unix::ffi::OsStrExt;
+
+    let broken_utf8 = OsStr::from_bytes(&"🦀".as_bytes()[..3]);
+    let invalid = OsStr::from_bytes(b"\xFF");
+
+    // Check that broken UTF-8 isn't treated as if it's valid.
+    let mut os_string = invalid.to_os_string();
+    os_string.push(broken_utf8);
+    assert_eq!(os_string.split_at_checked(1), None);
+
+    // We should be able to split on ascii with invalid UTF-8 between
+    let os_string = OsStr::from_bytes(b"a\xFFa");
+    assert_eq!(os_string.split_at_checked(1), Some(("a".as_ref(), OsStr::from_bytes(b"\xFFa"))));
+    assert_eq!(os_string.split_at_checked(2), Some((OsStr::from_bytes(b"a\xFF"), "a".as_ref(),)));
+
+    let os_string = OsStr::from_bytes(&"abc🦀".as_bytes()[..6]);
+    assert_eq!(
+        os_string.split_at_checked(3),
+        Some(("abc".as_ref(), OsStr::from_bytes(b"\xF0\x9F\xA6")))
+    );
+
+    let mut os_string = invalid.to_os_string();
+    os_string.push("🦀");
+    assert_eq!(os_string.split_at_checked(1), Some((invalid, "🦀".as_ref())));
+}
+
+#[test]
+#[cfg(windows)]
+fn os_str_slice_at_windows() {
+    use crate::os::windows::ffi::OsStringExt;
+
+    // slicing between unpaired surrogates should not be possible
+    // checking is implemented as a loop so we're agnostic towards
+    // the internal encoding
+    let os_string = OsString::from_wide(&[0xD800, 0xD800]);
+    for i in 1..os_string.len() {
+        assert_eq!(os_string.split_at_checked(i), None);
+    }
+    // For completeness, check that splitting at the start and end still works.
+    assert!(os_string.split_at_checked(0).is_some());
+    assert!(os_string.split_at_checked(os_string.len()).is_some());
+
+    // check that slicing before and after unpaired surrogates work
+    let surrogate = OsString::from_wide(&[0xD800]);
+
+    let mut os_string = surrogate.clone();
+    os_string.push("🦀");
+    assert_eq!(
+        os_string.split_at_checked(surrogate.len()),
+        Some((surrogate.as_ref(), "🦀".as_ref()))
+    );
+
+    let mut os_string = OsString::from("🦀");
+    os_string.push(&surrogate);
+    assert_eq!(os_string.split_at_checked("🦀".len()), Some(("🦀".as_ref(), surrogate.as_ref())));
+}
+
 #[test]
 fn clone_to_uninit() {
     let a = OsStr::new("hello.txt");
diff --git a/library/std/src/sys/os_str/bytes.rs b/library/std/src/sys/os_str/bytes.rs
index 5482663ef0079..a57da01a5d85d 100644
--- a/library/std/src/sys/os_str/bytes.rs
+++ b/library/std/src/sys/os_str/bytes.rs
@@ -238,16 +238,24 @@ impl Slice {
     #[track_caller]
     #[inline]
     pub fn check_public_boundary(&self, index: usize) {
+        if self.try_check_public_boundary(index).is_none() {
+            panic!("byte index {index} is not an OsStr boundary");
+        }
+    }
+
+    #[track_caller]
+    #[inline]
+    pub fn try_check_public_boundary(&self, index: usize) -> Option<()> {
         if index == 0 || index == self.inner.len() {
-            return;
+            return Some(());
         }
         if index < self.inner.len()
             && (self.inner[index - 1].is_ascii() || self.inner[index].is_ascii())
         {
-            return;
+            return Some(());
         }
 
-        slow_path(&self.inner, index);
+        return slow_path(&self.inner, index);
 
         /// We're betting that typical splits will involve an ASCII character.
         ///
@@ -255,26 +263,26 @@ impl Slice {
         /// better assembly.
         #[track_caller]
         #[inline(never)]
-        fn slow_path(bytes: &[u8], index: usize) {
-            let (before, after) = bytes.split_at(index);
+        fn slow_path(bytes: &[u8], index: usize) -> Option<()> {
+            let (before, after) = bytes.split_at_checked(index)?;
 
             // UTF-8 takes at most 4 bytes per codepoint, so we don't
             // need to check more than that.
             let after = after.get(..4).unwrap_or(after);
             match str::from_utf8(after) {
-                Ok(_) => return,
-                Err(err) if err.valid_up_to() != 0 => return,
+                Ok(_) => return Some(()),
+                Err(err) if err.valid_up_to() != 0 => return Some(()),
                 Err(_) => (),
             }
 
             for len in 2..=4.min(index) {
                 let before = &before[index - len..];
                 if str::from_utf8(before).is_ok() {
-                    return;
+                    return Some(());
                 }
             }
 
-            panic!("byte index {index} is not an OsStr boundary");
+            None
         }
     }
 
diff --git a/library/std/src/sys/os_str/utf8.rs b/library/std/src/sys/os_str/utf8.rs
index a324a478325e6..289f58aa480f7 100644
--- a/library/std/src/sys/os_str/utf8.rs
+++ b/library/std/src/sys/os_str/utf8.rs
@@ -224,10 +224,16 @@ impl Slice {
         Slice::from_str(unsafe { str::from_utf8_unchecked(s) })
     }
 
+    #[track_caller]
+    #[inline]
+    pub fn try_check_public_boundary(&self, index: usize) -> Option<()> {
+        if self.inner.is_char_boundary(index) { Some(()) } else { None }
+    }
+
     #[track_caller]
     #[inline]
     pub fn check_public_boundary(&self, index: usize) {
-        if !self.inner.is_char_boundary(index) {
+        if self.try_check_public_boundary(index).is_none() {
             panic!("byte index {index} is not an OsStr boundary");
         }
     }
diff --git a/library/std/src/sys/os_str/wtf8.rs b/library/std/src/sys/os_str/wtf8.rs
index 1f130d91cf393..9a32ab3f3ea12 100644
--- a/library/std/src/sys/os_str/wtf8.rs
+++ b/library/std/src/sys/os_str/wtf8.rs
@@ -240,6 +240,11 @@ impl Slice {
         unsafe { mem::transmute(Wtf8::from_bytes_unchecked(s)) }
     }
 
+    #[inline]
+    pub fn try_check_public_boundary(&self, index: usize) -> Option<()> {
+        self.inner.try_check_utf8_boundary(index).ok()
+    }
+
     #[track_caller]
     #[inline]
     pub fn check_public_boundary(&self, index: usize) {

From 53a4efa5fb1f45e42f493e74029769db7d63d035 Mon Sep 17 00:00:00 2001
From: Waffle Lapkin <waffle.lapkin@gmail.com>
Date: Fri, 15 May 2026 11:37:16 +0200
Subject: [PATCH 65/78] disallow `target_has_atomic_primitive_alignment` with
 no value

---
 compiler/rustc_session/src/config/cfg.rs    | 7 ++-----
 tests/ui/check-cfg/well-known-values.stderr | 2 +-
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/compiler/rustc_session/src/config/cfg.rs b/compiler/rustc_session/src/config/cfg.rs
index 74641fd629bc9..d16ab59a02d9e 100644
--- a/compiler/rustc_session/src/config/cfg.rs
+++ b/compiler/rustc_session/src/config/cfg.rs
@@ -485,13 +485,10 @@ impl CheckCfg {
             sym::integer(64usize),
             sym::integer(128usize),
         ];
-        for sym in [
-            sym::target_has_atomic,
-            sym::target_has_atomic_primitive_alignment,
-            sym::target_has_atomic_load_store,
-        ] {
+        for sym in [sym::target_has_atomic, sym::target_has_atomic_load_store] {
             ins!(sym, no_values).extend(atomic_values);
         }
+        ins!(sym::target_has_atomic_primitive_alignment, empty_values).extend(atomic_values);
 
         ins!(sym::target_thread_local, no_values);
 
diff --git a/tests/ui/check-cfg/well-known-values.stderr b/tests/ui/check-cfg/well-known-values.stderr
index 345fd345a6dba..47ed9891a1a6c 100644
--- a/tests/ui/check-cfg/well-known-values.stderr
+++ b/tests/ui/check-cfg/well-known-values.stderr
@@ -192,7 +192,7 @@ warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`
 LL |     target_has_atomic_primitive_alignment = "_UNEXPECTED_VALUE",
    |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    |
-   = note: expected values for `target_has_atomic_primitive_alignment` are: (none), `128`, `16`, `32`, `64`, `8`, and `ptr`
+   = note: expected values for `target_has_atomic_primitive_alignment` are: `128`, `16`, `32`, `64`, `8`, and `ptr`
    = note: see <https://doc.rust-lang.org/nightly/rustc/check-cfg.html> for more information about checking conditional configuration
 
 warning: unexpected `cfg` condition value: `_UNEXPECTED_VALUE`

From 2f69883daab6b2420afb27230e05600ea530cf30 Mon Sep 17 00:00:00 2001
From: Waffle Lapkin <waffle.lapkin@gmail.com>
Date: Fri, 15 May 2026 11:37:16 +0200
Subject: [PATCH 66/78] update list of cfgs in `src/doc/rustc`

---
 src/doc/rustc/src/check-cfg.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/doc/rustc/src/check-cfg.md b/src/doc/rustc/src/check-cfg.md
index a3e83d8e1d298..ef791b57895ab 100644
--- a/src/doc/rustc/src/check-cfg.md
+++ b/src/doc/rustc/src/check-cfg.md
@@ -99,7 +99,7 @@ the need to specify them manually.
 Well known names and values are implicitly added as long as at least one `--check-cfg` argument
 is present.
 
-As of `2025-01-02T`, the list of known names is as follows:
+As of `2026-05-15T`, the list of known names is as follows:
 
 <!--- See CheckCfg::fill_well_known in compiler/rustc_session/src/config.rs -->
 
@@ -122,6 +122,7 @@ As of `2025-01-02T`, the list of known names is as follows:
  - `target_endian`
  - `target_env`
  - `target_family`
+ - `target_object_format`
  - `target_feature`
  - `target_has_atomic`
  - `target_has_atomic_primitive_alignment`
@@ -131,6 +132,7 @@ As of `2025-01-02T`, the list of known names is as follows:
  - `target_thread_local`
  - `target_vendor`
  - `ub_checks`
+ - `contract_checks`
  - `unix`
  - `windows`
 

From 638a775642f5eddcd720826797085f1e8412614d Mon Sep 17 00:00:00 2001
From: Niklas Fiekas <niklas.fiekas@backscattering.de>
Date: Fri, 15 May 2026 14:53:18 +0200
Subject: [PATCH 67/78] Add pext/pdep as aliases for extract_bits/deposit_bits

So that the methods will be found when searching for the corresponding
intrinsics.

Tracking issue: https://github.com/rust-lang/rust/issues/149069
---
 library/core/src/num/uint_macros.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/library/core/src/num/uint_macros.rs b/library/core/src/num/uint_macros.rs
index 8dc668584b658..0e6fa12702c46 100644
--- a/library/core/src/num/uint_macros.rs
+++ b/library/core/src/num/uint_macros.rs
@@ -710,6 +710,7 @@ macro_rules! uint_impl {
         /// assert_eq!(n.extract_bits(0b0010_0100), 0b0000_0011);
         /// assert_eq!(n.extract_bits(0xF0), 0b0000_1011);
         /// ```
+        #[doc(alias = "pext")]
         #[unstable(feature = "uint_gather_scatter_bits", issue = "149069")]
         #[must_use = "this returns the result of the operation, \
                       without modifying the original"]
@@ -727,6 +728,7 @@ macro_rules! uint_impl {
         /// assert_eq!(n.deposit_bits(0b0101_0101), 0b0101_0001);
         /// assert_eq!(n.deposit_bits(0xF0), 0b1101_0000);
         /// ```
+        #[doc(alias = "pdep")]
         #[unstable(feature = "uint_gather_scatter_bits", issue = "149069")]
         #[must_use = "this returns the result of the operation, \
                       without modifying the original"]

From 4950a3c1a24149f1362259f44a40c61e17a209a8 Mon Sep 17 00:00:00 2001
From: Bowluckie <bowluckie@gmail.com>
Date: Sat, 16 May 2026 10:35:30 +1000
Subject: [PATCH 68/78] updated map.rs .values() method to use assert instead
 of println!

---
 library/std/src/collections/hash/map.rs | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/library/std/src/collections/hash/map.rs b/library/std/src/collections/hash/map.rs
index 09692b0d290d8..b1ed80b82bd1a 100644
--- a/library/std/src/collections/hash/map.rs
+++ b/library/std/src/collections/hash/map.rs
@@ -553,17 +553,16 @@ impl<K, V, S, A: Allocator> HashMap<K, V, S, A> {
     /// # Examples
     ///
     /// ```
-    /// use std::collections::HashMap;
-    ///
-    /// let map = HashMap::from([
+    /// let map: HashMap<&str, i32> = HashMap::from([
     ///     ("a", 1),
     ///     ("b", 2),
     ///     ("c", 3),
     /// ]);
     ///
-    /// for val in map.values() {
-    ///     println!("{val}");
-    /// }
+    /// let mut values: Vec<_> = map.values().copied().collect();
+    /// values.sort();
+    ///
+    /// assert_eq!(values, vec![1, 2, 3]);
     /// ```
     ///
     /// # Performance

From 20f68aebb195fea4c3a8614a7aa6ee491d35027f Mon Sep 17 00:00:00 2001
From: Bowluckie <bowluckie@gmail.com>
Date: Sat, 16 May 2026 11:04:33 +1000
Subject: [PATCH 69/78] Updated `map.rs` functions to all use asserts instread
 of `println!` in documentations

Updated `.keys()`, `.values()`, `.iter()` and `.iter_mut()`.

Instead of using print statements which shows no clear effect  by just reading the documentation
---
 library/std/src/collections/hash/map.rs | 33 ++++++++++++++-----------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/library/std/src/collections/hash/map.rs b/library/std/src/collections/hash/map.rs
index b1ed80b82bd1a..edb44e85be1a5 100644
--- a/library/std/src/collections/hash/map.rs
+++ b/library/std/src/collections/hash/map.rs
@@ -491,17 +491,16 @@ impl<K, V, S, A: Allocator> HashMap<K, V, S, A> {
     /// # Examples
     ///
     /// ```
-    /// use std::collections::HashMap;
-    ///
-    /// let map = HashMap::from([
+    /// let map: HashMap<&str, i32> = HashMap::from([
     ///     ("a", 1),
     ///     ("b", 2),
     ///     ("c", 3),
     /// ]);
     ///
-    /// for key in map.keys() {
-    ///     println!("{key}");
-    /// }
+    /// let mut values: Vec<_> = map.values().copied().collect();
+    /// values.sort();
+    ///
+    /// assert_eq!(values, vec!["a", "b", "c"]);
     /// ```
     ///
     /// # Performance
@@ -590,12 +589,12 @@ impl<K, V, S, A: Allocator> HashMap<K, V, S, A> {
     /// ]);
     ///
     /// for val in map.values_mut() {
-    ///     *val = *val + 10;
+    ///     *val += 10;
     /// }
     ///
-    /// for val in map.values() {
-    ///     println!("{val}");
-    /// }
+    /// assert_eq!(map.get("a"), Some(&11));
+    /// assert_eq!(map.get("b"), Some(&12));
+    /// assert_eq!(map.get("c"), Some(&13));
     /// ```
     ///
     /// # Performance
@@ -655,9 +654,13 @@ impl<K, V, S, A: Allocator> HashMap<K, V, S, A> {
     ///     ("c", 3),
     /// ]);
     ///
-    /// for (key, val) in map.iter() {
-    ///     println!("key: {key} val: {val}");
+    /// let mut count = 0;
+    ///
+    /// for (_key, _val) in map.iter() {
+    ///     count += 1;
     /// }
+    ///
+    /// assert_eq!(count, 3);
     /// ```
     ///
     /// # Performance
@@ -690,9 +693,9 @@ impl<K, V, S, A: Allocator> HashMap<K, V, S, A> {
     ///     *val *= 2;
     /// }
     ///
-    /// for (key, val) in &map {
-    ///     println!("key: {key} val: {val}");
-    /// }
+    /// assert_eq!(map.get("a"), Some(&2));
+    /// assert_eq!(map.get("b"), Some(&4));
+    /// assert_eq!(map.get("c"), Some(&6));
     /// ```
     ///
     /// # Performance

From 760b3f40bf1be5f6fc5db79bd225345e7a0bd6cc Mon Sep 17 00:00:00 2001
From: Bowluckie <bowluckie@gmail.com>
Date: Sat, 16 May 2026 11:35:43 +1000
Subject: [PATCH 70/78] fixed some bugs in hashmap documentation comments

`.keys()` was using `.values()` in the comment and 2 other functions were missing `use std::collections::HashMap`
---
 library/std/src/collections/hash/map.rs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/library/std/src/collections/hash/map.rs b/library/std/src/collections/hash/map.rs
index edb44e85be1a5..33e5b348c3080 100644
--- a/library/std/src/collections/hash/map.rs
+++ b/library/std/src/collections/hash/map.rs
@@ -491,13 +491,15 @@ impl<K, V, S, A: Allocator> HashMap<K, V, S, A> {
     /// # Examples
     ///
     /// ```
+    /// use std::collections::HashMap;
+    /// 
     /// let map: HashMap<&str, i32> = HashMap::from([
     ///     ("a", 1),
     ///     ("b", 2),
     ///     ("c", 3),
     /// ]);
     ///
-    /// let mut values: Vec<_> = map.values().copied().collect();
+    /// let mut values: Vec<_> = map.keys().copied().collect();
     /// values.sort();
     ///
     /// assert_eq!(values, vec!["a", "b", "c"]);
@@ -552,6 +554,8 @@ impl<K, V, S, A: Allocator> HashMap<K, V, S, A> {
     /// # Examples
     ///
     /// ```
+    /// use std::collections::HashMap;
+    /// 
     /// let map: HashMap<&str, i32> = HashMap::from([
     ///     ("a", 1),
     ///     ("b", 2),

From 3e3baaf15e93413711415921446b4e3eaa6349b4 Mon Sep 17 00:00:00 2001
From: Bowluckie <bowluckie@gmail.com>
Date: Sat, 16 May 2026 12:38:21 +1000
Subject: [PATCH 71/78] removed extra space after `///`

at lines 495 and 558
---
 library/std/src/collections/hash/map.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/library/std/src/collections/hash/map.rs b/library/std/src/collections/hash/map.rs
index 33e5b348c3080..a7a9b7901d7f4 100644
--- a/library/std/src/collections/hash/map.rs
+++ b/library/std/src/collections/hash/map.rs
@@ -492,7 +492,7 @@ impl<K, V, S, A: Allocator> HashMap<K, V, S, A> {
     ///
     /// ```
     /// use std::collections::HashMap;
-    /// 
+    ///
     /// let map: HashMap<&str, i32> = HashMap::from([
     ///     ("a", 1),
     ///     ("b", 2),
@@ -555,7 +555,7 @@ impl<K, V, S, A: Allocator> HashMap<K, V, S, A> {
     ///
     /// ```
     /// use std::collections::HashMap;
-    /// 
+    ///
     /// let map: HashMap<&str, i32> = HashMap::from([
     ///     ("a", 1),
     ///     ("b", 2),

From 521b89a82311ae9cd05080fb00955b555c9e147f Mon Sep 17 00:00:00 2001
From: Guillaume Gomez <contact@guillaume-gomez.fr>
Date: Sun, 17 May 2026 02:50:46 +0200
Subject: [PATCH 72/78] Update `sysinfo` version to `0.39.2`

---
 Cargo.lock                    | 4 ++--
 src/bootstrap/Cargo.lock      | 4 ++--
 src/bootstrap/Cargo.toml      | 2 +-
 src/tools/opt-dist/Cargo.toml | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 0f6368e4497cb..e16648dbef325 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5430,9 +5430,9 @@ dependencies = [
 
 [[package]]
 name = "sysinfo"
-version = "0.39.0"
+version = "0.39.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd9f9fe3d2b7b75cf4f2805e5b9926e8ac47146667b16b86298c4a8bf08cc469"
+checksum = "14311e7e9a03114cd4b65eedd54e8fed2945e17f08586ae97ef53bc0669f9581"
 dependencies = [
  "libc",
  "objc2-core-foundation",
diff --git a/src/bootstrap/Cargo.lock b/src/bootstrap/Cargo.lock
index e22fde79a8444..7c890e3f2004c 100644
--- a/src/bootstrap/Cargo.lock
+++ b/src/bootstrap/Cargo.lock
@@ -742,9 +742,9 @@ dependencies = [
 
 [[package]]
 name = "sysinfo"
-version = "0.39.0"
+version = "0.39.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd9f9fe3d2b7b75cf4f2805e5b9926e8ac47146667b16b86298c4a8bf08cc469"
+checksum = "14311e7e9a03114cd4b65eedd54e8fed2945e17f08586ae97ef53bc0669f9581"
 dependencies = [
  "libc",
  "memchr",
diff --git a/src/bootstrap/Cargo.toml b/src/bootstrap/Cargo.toml
index 363802093a13a..f4b89e8a28b0f 100644
--- a/src/bootstrap/Cargo.toml
+++ b/src/bootstrap/Cargo.toml
@@ -57,7 +57,7 @@ walkdir = "2.4"
 xz2 = "0.1"
 
 # Dependencies needed by the build-metrics feature
-sysinfo = { version = "0.39.0", default-features = false, optional = true, features = ["system"] }
+sysinfo = { version = "0.39.2", default-features = false, optional = true, features = ["system"] }
 
 # Dependencies needed by the `tracing` feature
 chrono = { version = "0.4", default-features = false, optional = true, features = ["now", "std"] }
diff --git a/src/tools/opt-dist/Cargo.toml b/src/tools/opt-dist/Cargo.toml
index d80e6caac0885..5893bf87b5c34 100644
--- a/src/tools/opt-dist/Cargo.toml
+++ b/src/tools/opt-dist/Cargo.toml
@@ -10,7 +10,7 @@ log = "0.4"
 anyhow = "1"
 humantime = "2"
 humansize = "2"
-sysinfo = { version = "0.39.0", default-features = false, features = ["disk"] }
+sysinfo = { version = "0.39.2", default-features = false, features = ["disk"] }
 fs_extra = "1"
 camino = "1"
 tar = "0.4.45"

From 62e595213d13524286a444f0767ee61bc1656125 Mon Sep 17 00:00:00 2001
From: "Tim (Theemathas Chirananthavat)" <theemathas@gmail.com>
Date: Sun, 17 May 2026 14:33:59 +0700
Subject: [PATCH 73/78] Remove `array::{try_,}from_trusted_iterator`

This is the only remaining place that uses `UncheckedIterator`. So,
replace `array::repeat` and array's `Clone` impl with a manual `from_fn`
call rather than going through unnecessary abstractions.

Removal of `UncheckedIterator` will be done in a later commit.
---
 library/core/src/array/mod.rs | 50 ++++++++++-------------------------
 1 file changed, 14 insertions(+), 36 deletions(-)

diff --git a/library/core/src/array/mod.rs b/library/core/src/array/mod.rs
index ab10120fe5548..56f188cfa622a 100644
--- a/library/core/src/array/mod.rs
+++ b/library/core/src/array/mod.rs
@@ -11,7 +11,7 @@ use crate::convert::Infallible;
 use crate::error::Error;
 use crate::hash::{self, Hash};
 use crate::intrinsics::transmute_unchecked;
-use crate::iter::{TrustedLen, UncheckedIterator, repeat_n};
+use crate::iter::{TrustedLen, repeat_n};
 use crate::marker::Destruct;
 use crate::mem::{self, ManuallyDrop, MaybeUninit};
 use crate::ops::{
@@ -52,7 +52,10 @@ pub use iter::IntoIter;
 #[must_use = "cloning is often expensive and is not expected to have side effects"]
 #[stable(feature = "array_repeat", since = "1.91.0")]
 pub fn repeat<T: Clone, const N: usize>(val: T) -> [T; N] {
-    from_trusted_iterator(repeat_n(val, N))
+    let mut iter = repeat_n(val, N);
+    // SAFETY: Unless a panic occurs, from_fn will call the closure N times,
+    // and repeat_n's next() will return Some for N times.
+    from_fn(move |_| unsafe { iter.next().unwrap_unchecked() })
 }
 
 /// Creates an array where each element is produced by calling `f` with
@@ -464,7 +467,15 @@ trait SpecArrayClone: Clone {
 impl<T: Clone> SpecArrayClone for T {
     #[inline]
     default fn clone<const N: usize>(array: &[T; N]) -> [T; N] {
-        from_trusted_iterator(array.iter().cloned())
+        let mut ptr: *const T = array.as_ptr();
+        // SAFETY: Unless a panic occurs, from_fn will call the closure N times,
+        // so our pointer arithmetic will be in bounds for the N-element array.
+        // This works even for ZSTs, since in that case, add() is a no-op.
+        from_fn(move |_| unsafe {
+            let old = ptr;
+            ptr = ptr.add(1);
+            (&*old).clone()
+        })
     }
 }
 
@@ -877,39 +888,6 @@ impl<T, const N: usize> [T; N] {
     }
 }
 
-/// Populate an array from the first `N` elements of `iter`
-///
-/// # Panics
-///
-/// If the iterator doesn't actually have enough items.
-///
-/// By depending on `TrustedLen`, however, we can do that check up-front (where
-/// it easily optimizes away) so it doesn't impact the loop that fills the array.
-#[inline]
-fn from_trusted_iterator<T, const N: usize>(iter: impl UncheckedIterator<Item = T>) -> [T; N] {
-    try_from_trusted_iterator(iter.map(NeverShortCircuit)).0
-}
-
-#[inline]
-fn try_from_trusted_iterator<T, R, const N: usize>(
-    iter: impl UncheckedIterator<Item = R>,
-) -> ChangeOutputType<R, [T; N]>
-where
-    R: Try<Output = T>,
-    R::Residual: Residual<[T; N]>,
-{
-    assert!(iter.size_hint().0 >= N);
-    fn next<T>(mut iter: impl UncheckedIterator<Item = T>) -> impl FnMut(usize) -> T {
-        move |_| {
-            // SAFETY: We know that `from_fn` will call this at most N times,
-            // and we checked to ensure that we have at least that many items.
-            unsafe { iter.next_unchecked() }
-        }
-    }
-
-    try_from_fn(next(iter))
-}
-
 /// Version of [`try_from_fn`] using a passed-in slice in order to avoid
 /// needing to monomorphize for every array length.
 ///

From ea703e28fc2e8d977cd99167c43a1d838de5508a Mon Sep 17 00:00:00 2001
From: "Tim (Theemathas Chirananthavat)" <theemathas@gmail.com>
Date: Sun, 17 May 2026 15:06:12 +0700
Subject: [PATCH 74/78] Remove `UncheckedIterator`

The last usage of this trait was removed in a previous commit.
---
 library/core/src/array/drain.rs               |  3 +-
 library/core/src/iter/adapters/cloned.rs      | 15 +-------
 library/core/src/iter/adapters/map.rs         | 15 +-------
 library/core/src/iter/adapters/zip.rs         | 11 +-----
 library/core/src/iter/mod.rs                  |  1 -
 library/core/src/iter/sources/repeat_n.rs     |  4 +--
 library/core/src/iter/traits/mod.rs           |  2 --
 .../src/iter/traits/unchecked_iterator.rs     | 36 -------------------
 library/core/src/slice/iter.rs                |  4 +--
 library/core/src/slice/iter/macros.rs         | 12 +------
 10 files changed, 7 insertions(+), 96 deletions(-)
 delete mode 100644 library/core/src/iter/traits/unchecked_iterator.rs

diff --git a/library/core/src/array/drain.rs b/library/core/src/array/drain.rs
index b2ff54bdfa21c..329b0e18b982b 100644
--- a/library/core/src/array/drain.rs
+++ b/library/core/src/array/drain.rs
@@ -6,8 +6,7 @@ impl<'l, 'f, T, U, F: FnMut(T) -> U> Drain<'l, 'f, T, F> {
     /// This function returns a function that lets you index the given array in const.
     /// As implemented it can optimize better than iterators, and can be constified.
     /// It acts like a sort of guard (owns the array) and iterator combined, which can be implemented
-    /// as it is a struct that implements const fn;
-    /// in that regard it is somewhat similar to an array::Iter implementing `UncheckedIterator`.
+    /// as it is a struct that implements const fn.
     /// The only method you're really allowed to call is `next()`,
     /// anything else is more or less UB, hence this function being unsafe.
     /// Moved elements will not be dropped.
diff --git a/library/core/src/iter/adapters/cloned.rs b/library/core/src/iter/adapters/cloned.rs
index 54d132813e4db..c0b6a4053c825 100644
--- a/library/core/src/iter/adapters/cloned.rs
+++ b/library/core/src/iter/adapters/cloned.rs
@@ -2,7 +2,7 @@ use core::num::NonZero;
 
 use crate::iter::adapters::zip::try_get_unchecked;
 use crate::iter::adapters::{SourceIter, TrustedRandomAccess, TrustedRandomAccessNoCoerce};
-use crate::iter::{FusedIterator, InPlaceIterable, TrustedLen, UncheckedIterator};
+use crate::iter::{FusedIterator, InPlaceIterable, TrustedLen};
 use crate::ops::Try;
 
 /// An iterator that clones the elements of an underlying iterator.
@@ -142,19 +142,6 @@ where
 {
 }
 
-impl<'a, I, T: 'a> UncheckedIterator for Cloned<I>
-where
-    I: UncheckedIterator<Item = &'a T>,
-    T: Clone,
-{
-    unsafe fn next_unchecked(&mut self) -> T {
-        // SAFETY: `Cloned` is 1:1 with the inner iterator, so if the caller promised
-        // that there's an element left, the inner iterator has one too.
-        let item = unsafe { self.it.next_unchecked() };
-        item.clone()
-    }
-}
-
 #[stable(feature = "default_iters", since = "1.70.0")]
 impl<I: Default> Default for Cloned<I> {
     /// Creates a `Cloned` iterator from the default value of `I`
diff --git a/library/core/src/iter/adapters/map.rs b/library/core/src/iter/adapters/map.rs
index f768f077aa27e..75f70dcd9b58f 100644
--- a/library/core/src/iter/adapters/map.rs
+++ b/library/core/src/iter/adapters/map.rs
@@ -1,7 +1,7 @@
 use crate::fmt;
 use crate::iter::adapters::zip::try_get_unchecked;
 use crate::iter::adapters::{SourceIter, TrustedRandomAccess, TrustedRandomAccessNoCoerce};
-use crate::iter::{FusedIterator, InPlaceIterable, TrustedFused, TrustedLen, UncheckedIterator};
+use crate::iter::{FusedIterator, InPlaceIterable, TrustedFused, TrustedLen};
 use crate::num::NonZero;
 use crate::ops::Try;
 
@@ -194,19 +194,6 @@ where
 {
 }
 
-impl<B, I, F> UncheckedIterator for Map<I, F>
-where
-    I: UncheckedIterator,
-    F: FnMut(I::Item) -> B,
-{
-    unsafe fn next_unchecked(&mut self) -> B {
-        // SAFETY: `Map` is 1:1 with the inner iterator, so if the caller promised
-        // that there's an element left, the inner iterator has one too.
-        let item = unsafe { self.iter.next_unchecked() };
-        (self.f)(item)
-    }
-}
-
 #[doc(hidden)]
 #[unstable(feature = "trusted_random_access", issue = "none")]
 unsafe impl<I, F> TrustedRandomAccess for Map<I, F> where I: TrustedRandomAccess {}
diff --git a/library/core/src/iter/adapters/zip.rs b/library/core/src/iter/adapters/zip.rs
index c5e199c30821d..4b19c7ffc00f8 100644
--- a/library/core/src/iter/adapters/zip.rs
+++ b/library/core/src/iter/adapters/zip.rs
@@ -1,8 +1,6 @@
 use crate::cmp;
 use crate::fmt::{self, Debug};
-use crate::iter::{
-    FusedIterator, InPlaceIterable, SourceIter, TrustedFused, TrustedLen, UncheckedIterator,
-};
+use crate::iter::{FusedIterator, InPlaceIterable, SourceIter, TrustedFused, TrustedLen};
 use crate::num::NonZero;
 
 /// An iterator that iterates two other iterators simultaneously.
@@ -456,13 +454,6 @@ where
 {
 }
 
-impl<A, B> UncheckedIterator for Zip<A, B>
-where
-    A: UncheckedIterator,
-    B: UncheckedIterator,
-{
-}
-
 // Arbitrarily selects the left side of the zip iteration as extractable "source"
 // it would require negative trait bounds to be able to try both
 #[unstable(issue = "none", feature = "inplace_iteration")]
diff --git a/library/core/src/iter/mod.rs b/library/core/src/iter/mod.rs
index d532f1e568071..9ddafd47807f2 100644
--- a/library/core/src/iter/mod.rs
+++ b/library/core/src/iter/mod.rs
@@ -458,7 +458,6 @@ pub use self::traits::TrustedFused;
 pub use self::traits::TrustedLen;
 #[unstable(feature = "trusted_step", issue = "85731")]
 pub use self::traits::TrustedStep;
-pub(crate) use self::traits::UncheckedIterator;
 #[stable(feature = "rust1", since = "1.0.0")]
 pub use self::traits::{
     DoubleEndedIterator, ExactSizeIterator, Extend, FromIterator, IntoIterator, Product, Sum,
diff --git a/library/core/src/iter/sources/repeat_n.rs b/library/core/src/iter/sources/repeat_n.rs
index 4cbaf41852142..0d4ced1b0c8a6 100644
--- a/library/core/src/iter/sources/repeat_n.rs
+++ b/library/core/src/iter/sources/repeat_n.rs
@@ -1,5 +1,5 @@
 use crate::fmt;
-use crate::iter::{FusedIterator, TrustedLen, UncheckedIterator};
+use crate::iter::{FusedIterator, TrustedLen};
 use crate::num::NonZero;
 use crate::ops::Try;
 
@@ -211,5 +211,3 @@ impl<A: Clone> FusedIterator for RepeatN<A> {}
 
 #[unstable(feature = "trusted_len", issue = "37572")]
 unsafe impl<A: Clone> TrustedLen for RepeatN<A> {}
-#[stable(feature = "iter_repeat_n", since = "1.82.0")]
-impl<A: Clone> UncheckedIterator for RepeatN<A> {}
diff --git a/library/core/src/iter/traits/mod.rs b/library/core/src/iter/traits/mod.rs
index b330e9ffe21ac..7639704d5799c 100644
--- a/library/core/src/iter/traits/mod.rs
+++ b/library/core/src/iter/traits/mod.rs
@@ -4,7 +4,6 @@ mod double_ended;
 mod exact_size;
 mod iterator;
 mod marker;
-mod unchecked_iterator;
 
 #[unstable(issue = "none", feature = "inplace_iteration")]
 pub use self::marker::InPlaceIterable;
@@ -12,7 +11,6 @@ pub use self::marker::InPlaceIterable;
 pub use self::marker::TrustedFused;
 #[unstable(feature = "trusted_step", issue = "85731")]
 pub use self::marker::TrustedStep;
-pub(crate) use self::unchecked_iterator::UncheckedIterator;
 #[stable(feature = "rust1", since = "1.0.0")]
 pub use self::{
     accum::{Product, Sum},
diff --git a/library/core/src/iter/traits/unchecked_iterator.rs b/library/core/src/iter/traits/unchecked_iterator.rs
deleted file mode 100644
index ae4bfcad4e68f..0000000000000
--- a/library/core/src/iter/traits/unchecked_iterator.rs
+++ /dev/null
@@ -1,36 +0,0 @@
-use crate::iter::TrustedLen;
-
-/// [`TrustedLen`] cannot have methods, so this allows augmenting it.
-///
-/// It currently requires `TrustedLen` because it's unclear whether it's
-/// reasonably possible to depend on the `size_hint` of anything else.
-pub(crate) trait UncheckedIterator: TrustedLen {
-    /// Gets the next item from a non-empty iterator.
-    ///
-    /// Because there's always a value to return, that means it can return
-    /// the `Item` type directly, without wrapping it in an `Option`.
-    ///
-    /// # Safety
-    ///
-    /// This can only be called if `size_hint().0 != 0`, guaranteeing that
-    /// there's at least one item available.
-    ///
-    /// Otherwise (aka when `size_hint().1 == Some(0)`), this is UB.
-    ///
-    /// # Note to Implementers
-    ///
-    /// This has a default implementation using [`Option::unwrap_unchecked`].
-    /// That's probably sufficient if your `next` *always* returns `Some`,
-    /// such as for infinite iterators.  In more complicated situations, however,
-    /// sometimes there can still be `insertvalue`/`assume`/`extractvalue`
-    /// instructions remaining in the IR from the `Option` handling, at which
-    /// point you might want to implement this manually instead.
-    #[unstable(feature = "trusted_len_next_unchecked", issue = "37572")]
-    #[inline]
-    unsafe fn next_unchecked(&mut self) -> Self::Item {
-        let opt = self.next();
-        // SAFETY: The caller promised that we're not empty, and
-        // `Self: TrustedLen` so we can actually trust the `size_hint`.
-        unsafe { opt.unwrap_unchecked() }
-    }
-}
diff --git a/library/core/src/slice/iter.rs b/library/core/src/slice/iter.rs
index ac096afb38af0..18abdbed5af6f 100644
--- a/library/core/src/slice/iter.rs
+++ b/library/core/src/slice/iter.rs
@@ -5,9 +5,7 @@ mod macros;
 
 use super::{from_raw_parts, from_raw_parts_mut};
 use crate::hint::assert_unchecked;
-use crate::iter::{
-    FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce, UncheckedIterator,
-};
+use crate::iter::{FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce};
 use crate::marker::PhantomData;
 use crate::mem::{self, SizedTypeProperties};
 use crate::num::NonZero;
diff --git a/library/core/src/slice/iter/macros.rs b/library/core/src/slice/iter/macros.rs
index 236bdf9d89cae..1c8a4d5ba3c2b 100644
--- a/library/core/src/slice/iter/macros.rs
+++ b/library/core/src/slice/iter/macros.rs
@@ -238,7 +238,7 @@ macro_rules! iterator {
                 // SAFETY: We are in bounds. `post_inc_start` does the right thing even for ZSTs.
                 unsafe {
                     self.post_inc_start(n);
-                    Some(self.next_unchecked())
+                    Some(self.post_inc_start(1).$into_ref())
                 }
             }
 
@@ -481,16 +481,6 @@ macro_rules! iterator {
         #[unstable(feature = "trusted_len", issue = "37572")]
         unsafe impl<T> TrustedLen for $name<'_, T> {}
 
-        impl<'a, T> UncheckedIterator for $name<'a, T> {
-            #[inline]
-            unsafe fn next_unchecked(&mut self) -> $elem {
-                // SAFETY: The caller promised there's at least one more item.
-                unsafe {
-                    self.post_inc_start(1).$into_ref()
-                }
-            }
-        }
-
         #[stable(feature = "default_iters", since = "1.70.0")]
         impl<T> Default for $name<'_, T> {
             /// Creates an empty slice iterator.

From 7905c8fd997c4e32703eb8adc793863707881bc6 Mon Sep 17 00:00:00 2001
From: JafarTano <81587305+JaafarTanoukhi@users.noreply.github.com>
Date: Mon, 18 May 2026 11:14:17 +0300
Subject: [PATCH 75/78] Add diagnostic items for IoBufReader and StdinLock

---
 compiler/rustc_span/src/symbol.rs        | 2 ++
 library/std/src/io/buffered/bufreader.rs | 1 +
 library/std/src/io/stdio.rs              | 1 +
 3 files changed, 4 insertions(+)

diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
index 69ed7314855f9..49852e006e4c9 100644
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@@ -237,6 +237,7 @@ symbols! {
         IntoFuture,
         IntoIterator,
         IntoIteratorItem,
+        IoBufReader,
         IrTyKind,
         Item,
         ItemContext,
@@ -306,6 +307,7 @@ symbols! {
         Some,
         Source,
         SpanCtxt,
+        StdinLock,
         Str,
         String,
         Struct,
diff --git a/library/std/src/io/buffered/bufreader.rs b/library/std/src/io/buffered/bufreader.rs
index ac461f4f0307e..36bf329a946e0 100644
--- a/library/std/src/io/buffered/bufreader.rs
+++ b/library/std/src/io/buffered/bufreader.rs
@@ -48,6 +48,7 @@ use crate::io::{
 /// }
 /// ```
 #[stable(feature = "rust1", since = "1.0.0")]
+#[cfg_attr(not(test), rustc_diagnostic_item = "IoBufReader")]
 pub struct BufReader<R: ?Sized> {
     buf: Buffer,
     inner: R,
diff --git a/library/std/src/io/stdio.rs b/library/std/src/io/stdio.rs
index 2d80fe49e80a7..845d325e4e5b4 100644
--- a/library/std/src/io/stdio.rs
+++ b/library/std/src/io/stdio.rs
@@ -285,6 +285,7 @@ pub struct Stdin {
 /// ```
 #[must_use = "if unused stdin will immediately unlock"]
 #[stable(feature = "rust1", since = "1.0.0")]
+#[cfg_attr(not(test), rustc_diagnostic_item = "StdinLock")]
 pub struct StdinLock<'a> {
     inner: MutexGuard<'a, BufReader<StdinRaw>>,
 }

From 0a07235472feeb8613fec006a374adf8ee26bde2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gabriel=20Bj=C3=B8rnager=20Jensen?= <gabriel@achernar.io>
Date: Sat, 16 May 2026 19:12:50 +0200
Subject: [PATCH 76/78] Widen 'widening_mul' result; Remove 'widening_mul' from
 'u128' and 'i128';

---
 library/core/src/fmt/num.rs                |  2 +-
 library/core/src/num/imp/diy_float.rs      |  2 +-
 library/core/src/num/int_macros.rs         | 31 +-------------
 library/core/src/num/mod.rs                | 41 ++++++++++++++++++
 library/core/src/num/uint_macros.rs        | 48 ----------------------
 library/coretests/tests/lib.rs             |  1 -
 library/coretests/tests/num/int_macros.rs  |  6 ---
 library/coretests/tests/num/uint_macros.rs |  4 --
 8 files changed, 44 insertions(+), 91 deletions(-)

diff --git a/library/core/src/fmt/num.rs b/library/core/src/fmt/num.rs
index e9302a6127f5c..7b9a5fd60f6f1 100644
--- a/library/core/src/fmt/num.rs
+++ b/library/core/src/fmt/num.rs
@@ -868,7 +868,7 @@ fn div_rem_1e16(n: u128) -> (u128, u64) {
     const M_HIGH: u128 = 76624777043294442917917351357515459181;
     const SH_POST: u8 = 51;
 
-    let quot = n.widening_mul(M_HIGH).1 >> SH_POST;
+    let quot = n.carrying_mul(M_HIGH, 0).1 >> SH_POST;
     let rem = n - quot * D;
     (quot, rem as u64)
 }
diff --git a/library/core/src/num/imp/diy_float.rs b/library/core/src/num/imp/diy_float.rs
index e054e7f3f10a7..439b74b31a78b 100644
--- a/library/core/src/num/imp/diy_float.rs
+++ b/library/core/src/num/imp/diy_float.rs
@@ -22,7 +22,7 @@ pub struct Fp {
 impl Fp {
     /// Returns a correctly rounded product of itself and `other`.
     pub fn mul(self, other: Self) -> Self {
-        let (lo, hi) = self.f.widening_mul(other.f);
+        let (lo, hi) = self.f.carrying_mul(other.f, 0);
         let f = hi + (lo >> 63) /* round */;
         let e = self.e + other.e + 64;
         Self { f, e }
diff --git a/library/core/src/num/int_macros.rs b/library/core/src/num/int_macros.rs
index 58c499efd55ce..4e7be6f065c9b 100644
--- a/library/core/src/num/int_macros.rs
+++ b/library/core/src/num/int_macros.rs
@@ -2777,32 +2777,6 @@ macro_rules! int_impl {
             (a as Self, b)
         }
 
-        /// Calculates the complete product `self * rhs` without the possibility to overflow.
-        ///
-        /// This returns the low-order (wrapping) bits and the high-order (overflow) bits
-        /// of the result as two separate values, in that order.
-        ///
-        /// If you also need to add a carry to the wide result, then you want
-        /// [`Self::carrying_mul`] instead.
-        ///
-        /// # Examples
-        ///
-        /// Please note that this example is shared among integer types, which is why `i32` is used.
-        ///
-        /// ```
-        /// #![feature(widening_mul)]
-        /// assert_eq!(5i32.widening_mul(-2), (4294967286, -1));
-        /// assert_eq!(1_000_000_000i32.widening_mul(-10), (2884901888, -3));
-        /// ```
-        #[unstable(feature = "widening_mul", issue = "152016")]
-        #[rustc_const_unstable(feature = "widening_mul", issue = "152016")]
-        #[must_use = "this returns the result of the operation, \
-                      without modifying the original"]
-        #[inline]
-        pub const fn widening_mul(self, rhs: Self) -> ($UnsignedT, Self) {
-            Self::carrying_mul_add(self, rhs, 0, 0)
-        }
-
         /// Calculates the "full multiplication" `self * rhs + carry`
         /// without the possibility to overflow.
         ///
@@ -2813,8 +2787,6 @@ macro_rules! int_impl {
         /// additional amount of overflow. This allows for chaining together multiple
         /// multiplications to create "big integers" which represent larger values.
         ///
-        /// If you don't need the `carry`, then you can use [`Self::widening_mul`] instead.
-        ///
         /// # Examples
         ///
         /// Please note that this example is shared among integer types, which is why `i32` is used.
@@ -2849,8 +2821,7 @@ macro_rules! int_impl {
         /// additional amount of overflow. This allows for chaining together multiple
         /// multiplications to create "big integers" which represent larger values.
         ///
-        /// If you don't need either `carry`, then you can use [`Self::widening_mul`] instead,
-        /// and if you only need one `carry`, then you can use [`Self::carrying_mul`] instead.
+        /// If you only need one `carry`, then you can use [`Self::carrying_mul`] instead.
         ///
         /// # Examples
         ///
diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs
index fdd4831336179..ed4624f7c9494 100644
--- a/library/core/src/num/mod.rs
+++ b/library/core/src/num/mod.rs
@@ -240,6 +240,39 @@ macro_rules! midpoint_impl {
     };
 }
 
+macro_rules! widening_mul_impl {
+    ($SelfT:ty, $WideT:ty) => {
+        /// Widening multiplication. Computes `self * rhs`, widening to a larger integer.
+        ///
+        /// The returned value is always exact and can never overflow.
+        ///
+        /// Note that this method is semantically equivalent to [`carrying_mul`] with a
+        /// carry of zero, with the latter instead returning a tuple denoting the low and
+        /// high parts of the result. Consider using it instead if you need
+        /// interoperability with other big int helper functions, or if this method isn't
+        /// available for a given type.
+        ///
+        /// [`carrying_mul`]: Self::carrying_mul
+        ///
+        /// # Examples
+        ///
+        /// ```
+        /// #![feature(widening_mul)]
+        ///
+        #[doc = concat!("assert_eq!(", stringify!($SelfT), "::MAX.widening_mul(0_", stringify!($SelfT), "), 0);")]
+        #[doc = concat!("assert_eq!(", stringify!($SelfT), "::MAX.widening_mul(", stringify!($SelfT), "::MAX), ", stringify!($SelfT), "::MAX as ", stringify!($WideT), " * ", stringify!($SelfT), "::MAX as ", stringify!($WideT), ");")]
+        /// ```
+        #[unstable(feature = "widening_mul", issue = "152016")]
+        #[rustc_const_unstable(feature = "widening_mul", issue = "152016")]
+        #[must_use = "this returns the result of the operation, \
+                      without modifying the original"]
+        #[inline]
+        pub const fn widening_mul(self, rhs: Self) -> $WideT {
+            self as $WideT * rhs as $WideT
+        }
+    }
+}
+
 macro_rules! widening_carryless_mul_impl {
     ($SelfT:ty, $WideT:ty) => {
         /// Performs a widening carry-less multiplication.
@@ -360,6 +393,7 @@ impl i8 {
         bound_condition = "",
     }
     midpoint_impl! { i8, i16, signed }
+    widening_mul_impl! { i8, i16 }
 }
 
 impl i16 {
@@ -384,6 +418,7 @@ impl i16 {
         bound_condition = "",
     }
     midpoint_impl! { i16, i32, signed }
+    widening_mul_impl! { i16, i32 }
 }
 
 impl i32 {
@@ -408,6 +443,7 @@ impl i32 {
         bound_condition = "",
     }
     midpoint_impl! { i32, i64, signed }
+    widening_mul_impl! { i32, i64 }
 }
 
 impl i64 {
@@ -432,6 +468,7 @@ impl i64 {
         bound_condition = "",
     }
     midpoint_impl! { i64, signed }
+    widening_mul_impl! { i64, i128 }
 }
 
 impl i128 {
@@ -568,6 +605,7 @@ impl u8 {
         bound_condition = "",
     }
     midpoint_impl! { u8, u16, unsigned }
+    widening_mul_impl! { u8, u16 }
     widening_carryless_mul_impl! { u8, u16 }
     carrying_carryless_mul_impl! { u8, u16 }
 
@@ -1215,6 +1253,7 @@ impl u16 {
         bound_condition = "",
     }
     midpoint_impl! { u16, u32, unsigned }
+    widening_mul_impl! { u16, u32 }
     widening_carryless_mul_impl! { u16, u32 }
     carrying_carryless_mul_impl! { u16, u32 }
 
@@ -1270,6 +1309,7 @@ impl u32 {
         bound_condition = "",
     }
     midpoint_impl! { u32, u64, unsigned }
+    widening_mul_impl! { u32, u64 }
     widening_carryless_mul_impl! { u32, u64 }
     carrying_carryless_mul_impl! { u32, u64 }
 }
@@ -1301,6 +1341,7 @@ impl u64 {
         bound_condition = "",
     }
     midpoint_impl! { u64, u128, unsigned }
+    widening_mul_impl! { u64, u128 }
     widening_carryless_mul_impl! { u64, u128 }
     carrying_carryless_mul_impl! { u64, u128 }
 }
diff --git a/library/core/src/num/uint_macros.rs b/library/core/src/num/uint_macros.rs
index 8dc668584b658..c12b84db56d1f 100644
--- a/library/core/src/num/uint_macros.rs
+++ b/library/core/src/num/uint_macros.rs
@@ -3177,54 +3177,6 @@ macro_rules! uint_impl {
             (a as Self, b)
         }
 
-        /// Calculates the complete double-width product `self * rhs`.
-        ///
-        /// This returns the low-order (wrapping) bits and the high-order (overflow) bits
-        /// of the result as two separate values, in that order. As such,
-        /// `a.widening_mul(b).0` produces the same result as `a.wrapping_mul(b)`.
-        ///
-        /// If you also need to add a value and carry to the wide result, then you want
-        /// [`Self::carrying_mul_add`] instead.
-        ///
-        /// If you also need to add a carry to the wide result, then you want
-        /// [`Self::carrying_mul`] instead.
-        ///
-        /// If you just want to know *whether* the multiplication overflowed, then you
-        /// want [`Self::overflowing_mul`] instead.
-        ///
-        /// # Examples
-        ///
-        /// ```
-        /// #![feature(widening_mul)]
-        #[doc = concat!("assert_eq!(5_", stringify!($SelfT), ".widening_mul(7), (35, 0));")]
-        #[doc = concat!("assert_eq!(", stringify!($SelfT), "::MAX.widening_mul(", stringify!($SelfT), "::MAX), (1, ", stringify!($SelfT), "::MAX - 1));")]
-        /// ```
-        ///
-        /// Compared to other `*_mul` methods:
-        /// ```
-        /// #![feature(widening_mul)]
-        #[doc = concat!("assert_eq!(", stringify!($SelfT), "::widening_mul(1 << ", stringify!($BITS_MINUS_ONE), ", 6), (0, 3));")]
-        #[doc = concat!("assert_eq!(", stringify!($SelfT), "::overflowing_mul(1 << ", stringify!($BITS_MINUS_ONE), ", 6), (0, true));")]
-        #[doc = concat!("assert_eq!(", stringify!($SelfT), "::wrapping_mul(1 << ", stringify!($BITS_MINUS_ONE), ", 6), 0);")]
-        #[doc = concat!("assert_eq!(", stringify!($SelfT), "::checked_mul(1 << ", stringify!($BITS_MINUS_ONE), ", 6), None);")]
-        /// ```
-        ///
-        /// Please note that this example is shared among integer types, which is why `u32` is used.
-        ///
-        /// ```
-        /// #![feature(widening_mul)]
-        /// assert_eq!(5u32.widening_mul(2), (10, 0));
-        /// assert_eq!(1_000_000_000u32.widening_mul(10), (1410065408, 2));
-        /// ```
-        #[unstable(feature = "widening_mul", issue = "152016")]
-        #[rustc_const_unstable(feature = "widening_mul", issue = "152016")]
-        #[must_use = "this returns the result of the operation, \
-                      without modifying the original"]
-        #[inline]
-        pub const fn widening_mul(self, rhs: Self) -> (Self, Self) {
-            Self::carrying_mul_add(self, rhs, 0, 0)
-        }
-
         /// Calculates the "full multiplication" `self * rhs + carry`
         /// without the possibility to overflow.
         ///
diff --git a/library/coretests/tests/lib.rs b/library/coretests/tests/lib.rs
index 12b81fea9d27c..18e09c707ebad 100644
--- a/library/coretests/tests/lib.rs
+++ b/library/coretests/tests/lib.rs
@@ -126,7 +126,6 @@
 #![feature(unicode_internals)]
 #![feature(unsize)]
 #![feature(unwrap_infallible)]
-#![feature(widening_mul)]
 // tidy-alphabetical-end
 #![allow(internal_features)]
 #![deny(fuzzy_provenance_casts)]
diff --git a/library/coretests/tests/num/int_macros.rs b/library/coretests/tests/num/int_macros.rs
index 2b641f595eba4..1334881aad68d 100644
--- a/library/coretests/tests/num/int_macros.rs
+++ b/library/coretests/tests/num/int_macros.rs
@@ -433,12 +433,6 @@ macro_rules! int_module {
                 assert_eq_const_safe!(($T, bool): (0 as $T).borrowing_sub(MIN, true), (MAX, false));
             }
 
-            fn test_widening_mul() {
-                assert_eq_const_safe!(($U, $T): MAX.widening_mul(MAX), (1, MAX / 2));
-                assert_eq_const_safe!(($U, $T): MIN.widening_mul(MAX), (MIN as $U, MIN / 2));
-                assert_eq_const_safe!(($U, $T): MIN.widening_mul(MIN), (0, MAX / 2 + 1));
-            }
-
             fn test_carrying_mul() {
                 assert_eq_const_safe!(($U, $T): MAX.carrying_mul(MAX, 0), (1, MAX / 2));
                 assert_eq_const_safe!(($U, $T):
diff --git a/library/coretests/tests/num/uint_macros.rs b/library/coretests/tests/num/uint_macros.rs
index 8dfa0924bd369..b686df0d14f2d 100644
--- a/library/coretests/tests/num/uint_macros.rs
+++ b/library/coretests/tests/num/uint_macros.rs
@@ -504,10 +504,6 @@ macro_rules! uint_module {
                 assert_eq_const_safe!(($T, bool): $T::MAX.borrowing_sub($T::MAX, true), ($T::MAX, true));
             }
 
-            fn test_widening_mul() {
-                assert_eq_const_safe!(($T, $T): $T::MAX.widening_mul($T::MAX), (1, $T::MAX - 1));
-            }
-
             fn test_carrying_mul() {
                 assert_eq_const_safe!(($T, $T): $T::MAX.carrying_mul($T::MAX, 0), (1, $T::MAX - 1));
                 assert_eq_const_safe!(($T, $T): $T::MAX.carrying_mul($T::MAX, $T::MAX), (0, $T::MAX));

From 7450bf099e59d935b31f30d9c4a8065ba522167c Mon Sep 17 00:00:00 2001
From: cyrgani <ansgar.w.zielke@gmail.com>
Date: Thu, 14 May 2026 09:23:48 +0000
Subject: [PATCH 77/78] reduce usage of `box_patterns` in tests

---
 tests/incremental/lint-unused-features.rs     |  6 ---
 .../main.rs                                   |  2 +-
 .../rmake.rs                                  |  2 +-
 .../unstable-feature-usage-metrics/lib.rs     |  2 +-
 .../unstable-feature-usage-metrics/rmake.rs   |  2 +-
 tests/ui/binding/match-unique-bind.rs         | 11 -----
 tests/ui/box/unit/unique-destructure.rs       |  9 ----
 tests/ui/box/unit/unique-pat-2.rs             | 18 -------
 tests/ui/deref/derefmut-closure-drop-order.rs | 47 -------------------
 .../unused-features/used-language-features.rs |  6 ---
 10 files changed, 4 insertions(+), 101 deletions(-)
 delete mode 100644 tests/ui/binding/match-unique-bind.rs
 delete mode 100644 tests/ui/box/unit/unique-destructure.rs
 delete mode 100644 tests/ui/box/unit/unique-pat-2.rs
 delete mode 100644 tests/ui/deref/derefmut-closure-drop-order.rs

diff --git a/tests/incremental/lint-unused-features.rs b/tests/incremental/lint-unused-features.rs
index 898bb19326018..0bf9730727f74 100644
--- a/tests/incremental/lint-unused-features.rs
+++ b/tests/incremental/lint-unused-features.rs
@@ -4,7 +4,6 @@
 #![deny(unused_features)]
 
 // Used language features
-#![feature(box_patterns)]
 #![feature(decl_macro)]
 #![cfg_attr(all(), feature(rustc_attrs))]
 
@@ -14,11 +13,6 @@
 #![cfg_attr(all(), feature(allocator_api))]
 //[bfail]~^ ERROR feature `allocator_api` is declared but not used
 
-pub fn use_box_patterns(b: Box<i32>) -> i32 {
-    let box x = b;
-    x
-}
-
 macro m() {}
 pub fn use_decl_macro() {
     m!();
diff --git a/tests/run-make/unstable-feature-usage-metrics-incremental/main.rs b/tests/run-make/unstable-feature-usage-metrics-incremental/main.rs
index f970d395b2cf2..ca33275e3e389 100644
--- a/tests/run-make/unstable-feature-usage-metrics-incremental/main.rs
+++ b/tests/run-make/unstable-feature-usage-metrics-incremental/main.rs
@@ -1,5 +1,5 @@
 #![feature(ascii_char)] // random lib feature
-#![feature(box_patterns)] // random lang feature
+#![feature(test_unstable_lint)] // random lang feature
 
 // picked arbitrary unstable features, just need a random lib and lang feature, ideally ones that
 // won't be stabilized any time soon so we don't have to update this test
diff --git a/tests/run-make/unstable-feature-usage-metrics-incremental/rmake.rs b/tests/run-make/unstable-feature-usage-metrics-incremental/rmake.rs
index 862b2bd5300c3..61ad07717b658 100644
--- a/tests/run-make/unstable-feature-usage-metrics-incremental/rmake.rs
+++ b/tests/run-make/unstable-feature-usage-metrics-incremental/rmake.rs
@@ -69,7 +69,7 @@ fn test_metrics_dump() {
         let expected = serde_json::json!(
             {
                 "lib_features":[{"symbol":"ascii_char", "timestamp":null}],
-                "lang_features":[{"symbol":"box_patterns","since":null, "timestamp":null}]
+                "lang_features":[{"symbol":"test_unstable_lint","since":null, "timestamp":null}]
             }
         );
 
diff --git a/tests/run-make/unstable-feature-usage-metrics/lib.rs b/tests/run-make/unstable-feature-usage-metrics/lib.rs
index 2202d722c497e..488c00045c454 100644
--- a/tests/run-make/unstable-feature-usage-metrics/lib.rs
+++ b/tests/run-make/unstable-feature-usage-metrics/lib.rs
@@ -1,5 +1,5 @@
 #![feature(ascii_char)] // random lib feature
-#![feature(box_patterns)] // random lang feature
+#![feature(test_unstable_lint)] // random lang feature
 
 // picked arbitrary unstable features, just need a random lib and lang feature, ideally ones that
 // won't be stabilized any time soon so we don't have to update this test
diff --git a/tests/run-make/unstable-feature-usage-metrics/rmake.rs b/tests/run-make/unstable-feature-usage-metrics/rmake.rs
index f987829741c72..85ea50e7065ab 100644
--- a/tests/run-make/unstable-feature-usage-metrics/rmake.rs
+++ b/tests/run-make/unstable-feature-usage-metrics/rmake.rs
@@ -67,7 +67,7 @@ fn test_metrics_dump() {
         let expected = serde_json::json!(
             {
                 "lib_features":[{"symbol":"ascii_char", "timestamp":null}],
-                "lang_features":[{"symbol":"box_patterns","since":null, "timestamp":null}]
+                "lang_features":[{"symbol":"test_unstable_lint","since":null, "timestamp":null}]
             }
         );
 
diff --git a/tests/ui/binding/match-unique-bind.rs b/tests/ui/binding/match-unique-bind.rs
deleted file mode 100644
index 02f1945f5718f..0000000000000
--- a/tests/ui/binding/match-unique-bind.rs
+++ /dev/null
@@ -1,11 +0,0 @@
-//@ run-pass
-#![feature(box_patterns)]
-
-pub fn main() {
-    match Box::new(100) {
-      box x => {
-        println!("{}", x);
-        assert_eq!(x, 100);
-      }
-    }
-}
diff --git a/tests/ui/box/unit/unique-destructure.rs b/tests/ui/box/unit/unique-destructure.rs
deleted file mode 100644
index 2ddb3c452cd59..0000000000000
--- a/tests/ui/box/unit/unique-destructure.rs
+++ /dev/null
@@ -1,9 +0,0 @@
-//@ run-pass
-#![feature(box_patterns)]
-
-struct Foo { a: isize, b: isize }
-
-pub fn main() {
-    let box Foo{ a, b } = Box::new(Foo { a: 100, b: 200 });
-    assert_eq!(a + b, 300);
-}
diff --git a/tests/ui/box/unit/unique-pat-2.rs b/tests/ui/box/unit/unique-pat-2.rs
deleted file mode 100644
index 85f0fbd5e4f13..0000000000000
--- a/tests/ui/box/unit/unique-pat-2.rs
+++ /dev/null
@@ -1,18 +0,0 @@
-//@ run-pass
-#![allow(dead_code)]
-#![allow(non_camel_case_types)]
-#![allow(non_shorthand_field_patterns)]
-
-#![feature(box_patterns)]
-
-struct Foo {a: isize, b: usize}
-
-enum bar { u(Box<Foo>), w(isize), }
-
-pub fn main() {
-    let v = match bar::u(Box::new(Foo{ a: 10, b: 40 })) {
-        bar::u(box Foo{ a: a, b: b }) => { a + (b as isize) }
-        _ => { 66 }
-    };
-    assert_eq!(v, 50);
-}
diff --git a/tests/ui/deref/derefmut-closure-drop-order.rs b/tests/ui/deref/derefmut-closure-drop-order.rs
deleted file mode 100644
index b95ae68f1734f..0000000000000
--- a/tests/ui/deref/derefmut-closure-drop-order.rs
+++ /dev/null
@@ -1,47 +0,0 @@
-//! Regression test for https://github.com/rust-lang/rust/issues/16774
-
-//@ run-pass
-#![feature(box_patterns)]
-
-use std::ops::{Deref, DerefMut};
-
-struct X(Box<isize>);
-
-static mut DESTRUCTOR_RAN: bool = false;
-
-impl Drop for X {
-    fn drop(&mut self) {
-        unsafe {
-            assert!(!DESTRUCTOR_RAN);
-            DESTRUCTOR_RAN = true;
-        }
-    }
-}
-
-impl Deref for X {
-    type Target = isize;
-
-    fn deref(&self) -> &isize {
-        let &X(box ref x) = self;
-        x
-    }
-}
-
-impl DerefMut for X {
-    fn deref_mut(&mut self) -> &mut isize {
-        let &mut X(box ref mut x) = self;
-        x
-    }
-}
-
-fn main() {
-    {
-        let mut test = X(Box::new(5));
-        {
-            let mut change = || { *test = 10 };
-            change();
-        }
-        assert_eq!(*test, 10);
-    }
-    assert!(unsafe { DESTRUCTOR_RAN });
-}
diff --git a/tests/ui/lint/unused-features/used-language-features.rs b/tests/ui/lint/unused-features/used-language-features.rs
index 7da4866a00d15..ddc40ce2e96b9 100644
--- a/tests/ui/lint/unused-features/used-language-features.rs
+++ b/tests/ui/lint/unused-features/used-language-features.rs
@@ -4,15 +4,9 @@
 #![deny(unused_features)]
 
 // Used language features
-#![feature(box_patterns)]
 #![feature(decl_macro)]
 #![cfg_attr(all(), feature(rustc_attrs))]
 
-pub fn use_box_patterns(b: Box<i32>) -> i32 {
-    let box x = b;
-    x
-}
-
 macro m() {}
 pub fn use_decl_macro() {
     m!();

From 450cdb550130dfaab2c29500d5e1b4d0093f7b09 Mon Sep 17 00:00:00 2001
From: daxpedda <daxpedda@gmail.com>
Date: Thu, 14 May 2026 20:05:09 +0200
Subject: [PATCH 78/78] Allow `global_asm!` in statement positions

---
 compiler/rustc_builtin_macros/src/asm.rs      | 42 ++++++++++----
 tests/ui/asm/naked-functions.rs               |  8 ++-
 tests/ui/asm/naked-functions.stderr           | 56 ++++++++++---------
 tests/ui/asm/statement-global-asm-error.rs    | 13 +++++
 .../ui/asm/statement-global-asm-error.stderr  | 14 +++++
 tests/ui/asm/statement-global-asm.rs          |  8 +++
 6 files changed, 104 insertions(+), 37 deletions(-)
 create mode 100644 tests/ui/asm/statement-global-asm-error.rs
 create mode 100644 tests/ui/asm/statement-global-asm-error.stderr
 create mode 100644 tests/ui/asm/statement-global-asm.rs

diff --git a/compiler/rustc_builtin_macros/src/asm.rs b/compiler/rustc_builtin_macros/src/asm.rs
index a1e14b5245137..d779089eeaa4c 100644
--- a/compiler/rustc_builtin_macros/src/asm.rs
+++ b/compiler/rustc_builtin_macros/src/asm.rs
@@ -10,7 +10,7 @@ use rustc_parse_format as parse;
 use rustc_session::lint;
 use rustc_span::{ErrorGuaranteed, InnerSpan, Span, Symbol, sym};
 use rustc_target::asm::InlineAsmArch;
-use smallvec::smallvec;
+use smallvec::{SmallVec, smallvec};
 
 use crate::errors;
 use crate::util::{ExprToSpannedString, expr_to_spanned_string};
@@ -26,6 +26,24 @@ struct ValidatedAsmArgs {
     pub options_spans: Vec<Span>,
 }
 
+struct MacGlobalAsm {
+    item: ast::Item,
+}
+
+impl MacResult for MacGlobalAsm {
+    fn make_items(self: Box<Self>) -> Option<SmallVec<[Box<ast::Item>; 1]>> {
+        Some(smallvec![Box::new(self.item)])
+    }
+
+    fn make_stmts(self: Box<Self>) -> Option<SmallVec<[ast::Stmt; 1]>> {
+        Some(smallvec![ast::Stmt {
+            id: ast::DUMMY_NODE_ID,
+            span: self.item.span,
+            kind: ast::StmtKind::Item(Box::new(self.item)),
+        }])
+    }
+}
+
 fn parse_args<'a>(
     ecx: &ExtCtxt<'a>,
     sp: Span,
@@ -650,18 +668,20 @@ pub(super) fn expand_global_asm<'cx>(
                 return ExpandResult::Retry(());
             };
             match mac {
-                Ok(inline_asm) => MacEager::items(smallvec![Box::new(ast::Item {
-                    attrs: ast::AttrVec::new(),
-                    id: ast::DUMMY_NODE_ID,
-                    kind: ast::ItemKind::GlobalAsm(Box::new(inline_asm)),
-                    vis: ast::Visibility {
-                        span: sp.shrink_to_lo(),
-                        kind: ast::VisibilityKind::Inherited,
+                Ok(inline_asm) => Box::new(MacGlobalAsm {
+                    item: ast::Item {
+                        attrs: ast::AttrVec::new(),
+                        id: ast::DUMMY_NODE_ID,
+                        kind: ast::ItemKind::GlobalAsm(Box::new(inline_asm)),
+                        vis: ast::Visibility {
+                            span: sp.shrink_to_lo(),
+                            kind: ast::VisibilityKind::Inherited,
+                            tokens: None,
+                        },
+                        span: sp,
                         tokens: None,
                     },
-                    span: sp,
-                    tokens: None,
-                })]),
+                }),
                 Err(guar) => DummyResult::any(sp, guar),
             }
         }
diff --git a/tests/ui/asm/naked-functions.rs b/tests/ui/asm/naked-functions.rs
index 55f2f552ad31c..abe26ca91e5d8 100644
--- a/tests/ui/asm/naked-functions.rs
+++ b/tests/ui/asm/naked-functions.rs
@@ -6,7 +6,7 @@
 #![feature(asm_unwind, linkage, rustc_attrs, cfg_target_object_format)]
 #![crate_type = "lib"]
 
-use std::arch::{asm, naked_asm};
+use std::arch::{asm, global_asm, naked_asm};
 
 #[unsafe(naked)]
 pub extern "C" fn inline_asm_macro() {
@@ -14,6 +14,12 @@ pub extern "C" fn inline_asm_macro() {
     //~^ERROR the `asm!` macro is not allowed in naked functions
 }
 
+#[unsafe(naked)]
+pub extern "C" fn global_asm_macro() {
+    //~^ERROR naked functions must contain a single `naked_asm!` invocation
+    global_asm!("");
+}
+
 #[repr(C)]
 pub struct P {
     x: u8,
diff --git a/tests/ui/asm/naked-functions.stderr b/tests/ui/asm/naked-functions.stderr
index 2b67c3aecd73c..e8963f7e1d52c 100644
--- a/tests/ui/asm/naked-functions.stderr
+++ b/tests/ui/asm/naked-functions.stderr
@@ -1,71 +1,71 @@
 error: the `in` operand cannot be used with `naked_asm!`
-  --> $DIR/naked-functions.rs:47:29
+  --> $DIR/naked-functions.rs:53:29
    |
 LL |     naked_asm!("/* {0} */", in(reg) a)
    |                             ^^ the `in` operand is not meaningful for global-scoped inline assembly, remove it
 
 error: the `in` operand cannot be used with `naked_asm!`
-  --> $DIR/naked-functions.rs:68:10
+  --> $DIR/naked-functions.rs:74:10
    |
 LL |          in(reg) a,
    |          ^^ the `in` operand is not meaningful for global-scoped inline assembly, remove it
 
 error: the `noreturn` option cannot be used with `naked_asm!`
-  --> $DIR/naked-functions.rs:88:28
+  --> $DIR/naked-functions.rs:94:28
    |
 LL |     naked_asm!("", options(noreturn));
    |                            ^^^^^^^^ the `noreturn` option is not meaningful for global-scoped inline assembly
 
 error: the `nomem` option cannot be used with `naked_asm!`
-  --> $DIR/naked-functions.rs:105:28
+  --> $DIR/naked-functions.rs:111:28
    |
 LL |     naked_asm!("", options(nomem, preserves_flags));
    |                            ^^^^^ the `nomem` option is not meaningful for global-scoped inline assembly
 
 error: the `preserves_flags` option cannot be used with `naked_asm!`
-  --> $DIR/naked-functions.rs:105:35
+  --> $DIR/naked-functions.rs:111:35
    |
 LL |     naked_asm!("", options(nomem, preserves_flags));
    |                                   ^^^^^^^^^^^^^^^ the `preserves_flags` option is not meaningful for global-scoped inline assembly
 
 error: the `readonly` option cannot be used with `naked_asm!`
-  --> $DIR/naked-functions.rs:112:28
+  --> $DIR/naked-functions.rs:118:28
    |
 LL |     naked_asm!("", options(readonly, nostack), options(pure));
    |                            ^^^^^^^^ the `readonly` option is not meaningful for global-scoped inline assembly
 
 error: the `nostack` option cannot be used with `naked_asm!`
-  --> $DIR/naked-functions.rs:112:38
+  --> $DIR/naked-functions.rs:118:38
    |
 LL |     naked_asm!("", options(readonly, nostack), options(pure));
    |                                      ^^^^^^^ the `nostack` option is not meaningful for global-scoped inline assembly
 
 error: the `pure` option cannot be used with `naked_asm!`
-  --> $DIR/naked-functions.rs:112:56
+  --> $DIR/naked-functions.rs:118:56
    |
 LL |     naked_asm!("", options(readonly, nostack), options(pure));
    |                                                        ^^^^ the `pure` option is not meaningful for global-scoped inline assembly
 
 error: the `may_unwind` option cannot be used with `naked_asm!`
-  --> $DIR/naked-functions.rs:120:28
+  --> $DIR/naked-functions.rs:126:28
    |
 LL |     naked_asm!("", options(may_unwind));
    |                            ^^^^^^^^^^ the `may_unwind` option is not meaningful for global-scoped inline assembly
 
 error: this is a user specified error
-  --> $DIR/naked-functions.rs:151:5
+  --> $DIR/naked-functions.rs:157:5
    |
 LL |     compile_error!("this is a user specified error")
    |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 error: this is a user specified error
-  --> $DIR/naked-functions.rs:157:5
+  --> $DIR/naked-functions.rs:163:5
    |
 LL |     compile_error!("this is a user specified error");
    |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 error: asm template must be a string literal
-  --> $DIR/naked-functions.rs:164:16
+  --> $DIR/naked-functions.rs:170:16
    |
 LL |     naked_asm!(invalid_syntax)
    |                ^^^^^^^^^^^^^^
@@ -76,32 +76,38 @@ error[E0787]: the `asm!` macro is not allowed in naked functions
 LL |     unsafe { asm!("", options(raw)) };
    |              ^^^^^^^^^^^^^^^^^^^^^^ consider using the `naked_asm!` macro instead
 
+error[E0787]: naked functions must contain a single `naked_asm!` invocation
+  --> $DIR/naked-functions.rs:18:1
+   |
+LL | pub extern "C" fn global_asm_macro() {
+   | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
 error: patterns not allowed in naked function parameters
-  --> $DIR/naked-functions.rs:25:5
+  --> $DIR/naked-functions.rs:31:5
    |
 LL |     mut a: u32,
    |     ^^^^^
 
 error: patterns not allowed in naked function parameters
-  --> $DIR/naked-functions.rs:27:5
+  --> $DIR/naked-functions.rs:33:5
    |
 LL |     &b: &i32,
    |     ^^
 
 error: patterns not allowed in naked function parameters
-  --> $DIR/naked-functions.rs:29:6
+  --> $DIR/naked-functions.rs:35:6
    |
 LL |     (None | Some(_)): Option<std::ptr::NonNull<u8>>,
    |      ^^^^^^^^^^^^^^
 
 error: patterns not allowed in naked function parameters
-  --> $DIR/naked-functions.rs:31:5
+  --> $DIR/naked-functions.rs:37:5
    |
 LL |     P { x, y }: P,
    |     ^^^^^^^^^^
 
 error: referencing function parameters is not allowed in naked functions
-  --> $DIR/naked-functions.rs:40:5
+  --> $DIR/naked-functions.rs:46:5
    |
 LL |     a + 1
    |     ^
@@ -109,7 +115,7 @@ LL |     a + 1
    = help: follow the calling convention in asm block to use parameters
 
 error[E0787]: naked functions must contain a single `naked_asm!` invocation
-  --> $DIR/naked-functions.rs:38:1
+  --> $DIR/naked-functions.rs:44:1
    |
 LL | pub extern "C" fn inc(a: u32) -> u32 {
    | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -118,7 +124,7 @@ LL |     a + 1
    |     ----- not allowed in naked functions
 
 error[E0787]: naked functions must contain a single `naked_asm!` invocation
-  --> $DIR/naked-functions.rs:52:1
+  --> $DIR/naked-functions.rs:58:1
    |
 LL | pub extern "C" fn inc_closure(a: u32) -> u32 {
    | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -127,7 +133,7 @@ LL |     (|| a + 1)()
    |     ------------ not allowed in naked functions
 
 error[E0787]: naked functions must contain a single `naked_asm!` invocation
-  --> $DIR/naked-functions.rs:58:1
+  --> $DIR/naked-functions.rs:64:1
    |
 LL | pub extern "C" fn unsupported_operands() {
    | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -144,13 +150,13 @@ LL |     let mut e = 0usize;
    |     ------------------- not allowed in naked functions
 
 error[E0787]: naked functions must contain a single `naked_asm!` invocation
-  --> $DIR/naked-functions.rs:80:1
+  --> $DIR/naked-functions.rs:86:1
    |
 LL | pub extern "C" fn missing_assembly() {
    | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 error[E0787]: naked functions must contain a single `naked_asm!` invocation
-  --> $DIR/naked-functions.rs:85:1
+  --> $DIR/naked-functions.rs:91:1
    |
 LL | pub extern "C" fn too_many_asm_blocks() {
    | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -159,7 +165,7 @@ LL |     naked_asm!("");
    |     -------------- multiple `naked_asm!` invocations are not allowed in naked functions
 
 error: referencing function parameters is not allowed in naked functions
-  --> $DIR/naked-functions.rs:97:11
+  --> $DIR/naked-functions.rs:103:11
    |
 LL |         *&y
    |           ^
@@ -167,7 +173,7 @@ LL |         *&y
    = help: follow the calling convention in asm block to use parameters
 
 error[E0787]: naked functions must contain a single `naked_asm!` invocation
-  --> $DIR/naked-functions.rs:95:5
+  --> $DIR/naked-functions.rs:101:5
    |
 LL |     pub extern "C" fn inner(y: usize) -> usize {
    |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -175,6 +181,6 @@ LL |
 LL |         *&y
    |         --- not allowed in naked functions
 
-error: aborting due to 25 previous errors
+error: aborting due to 26 previous errors
 
 For more information about this error, try `rustc --explain E0787`.
diff --git a/tests/ui/asm/statement-global-asm-error.rs b/tests/ui/asm/statement-global-asm-error.rs
new file mode 100644
index 0000000000000..f32ba73357a19
--- /dev/null
+++ b/tests/ui/asm/statement-global-asm-error.rs
@@ -0,0 +1,13 @@
+//@ needs-asm-support
+
+use std::arch::global_asm;
+
+fn main() {
+    let x = 42;
+    global_asm!("{}", in(x));
+    //~^ ERROR the `in` operand cannot be used with `global_asm!`
+    //~^^ NOTE the `in` operand is not meaningful for global-scoped inline assembly, remove it
+
+    let y = global_asm!("");
+    //~^ ERROR non-expression macro in expression position: global_asm
+}
diff --git a/tests/ui/asm/statement-global-asm-error.stderr b/tests/ui/asm/statement-global-asm-error.stderr
new file mode 100644
index 0000000000000..35dde6ac1a781
--- /dev/null
+++ b/tests/ui/asm/statement-global-asm-error.stderr
@@ -0,0 +1,14 @@
+error: the `in` operand cannot be used with `global_asm!`
+  --> $DIR/statement-global-asm-error.rs:7:23
+   |
+LL |     global_asm!("{}", in(x));
+   |                       ^^ the `in` operand is not meaningful for global-scoped inline assembly, remove it
+
+error: non-expression macro in expression position: global_asm
+  --> $DIR/statement-global-asm-error.rs:11:13
+   |
+LL |     let y = global_asm!("");
+   |             ^^^^^^^^^^^^^^^
+
+error: aborting due to 2 previous errors
+
diff --git a/tests/ui/asm/statement-global-asm.rs b/tests/ui/asm/statement-global-asm.rs
new file mode 100644
index 0000000000000..597495546d955
--- /dev/null
+++ b/tests/ui/asm/statement-global-asm.rs
@@ -0,0 +1,8 @@
+//@ needs-asm-support
+//@ run-pass
+
+use std::arch::global_asm;
+
+fn main() {
+    global_asm!("");
+}